In [16]:
!pip install openpyxl
Requirement already satisfied: openpyxl in c:\users\hp\appdata\local\anaconda3\lib\site-packages (3.1.5)
Requirement already satisfied: et-xmlfile in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from openpyxl) (1.1.0)

Week 3 Started from here

In [ ]:
import pandas as pd
import os
from pathlib import Path
import re

# Define folder paths
folders = {
    "ReviewVs": r"C:\Users\hp\Desktop\WEEK 3_DSHub\Reviewvs",
    "Stats_Crashes": r"C:\Users\hp\Desktop\WEEK 3_DSHub\Stats_Crashes",
    "Stats_Install": r"C:\Users\hp\Desktop\WEEK 3_DSHub\Stats_Install",
    "Stats_Ratings": r"C:\Users\hp\Desktop\WEEK 3_DSHub\Stats_Ratings",
    "Supported_devices": r"C:\Users\hp\Desktop\WEEK 3_DSHub\Supported_devices"
}

# Dictionary to store merged dataframes
merged_dataframes = {}

# Process each folder
for folder_name, folder_path in folders.items():
    print(f"\n{'='*60}")
    print(f"Processing: {folder_name}")
    print(f"{'='*60}")
    
    # Check if folder exists
    if not os.path.exists(folder_path):
        print(f"  Folder not found: {folder_path}")
        continue
    
    # Get all CSV files in the folder
    csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
    
    if not csv_files:
        print(f"  No CSV files found in {folder_name}")
        continue
    
    print(f"Found {len(csv_files)} CSV file(s)")
    
    # Read and clean each CSV file
    dataframes = []
    for csv_file in csv_files:
        file_path = os.path.join(folder_path, csv_file)
        try:
            # Try reading with Python engine and error handling
            try:
                df = pd.read_csv(file_path, encoding='utf-8', engine='python', on_bad_lines='skip', quoting=3)
            except:
                try:
                    df = pd.read_csv(file_path, encoding='latin-1', engine='python', on_bad_lines='skip', quoting=3)
                except:
                    # Last resort: read with maximum flexibility
                    df = pd.read_csv(file_path, encoding='utf-8', engine='python', on_bad_lines='skip')
            
            # Remove completely empty rows
            df = df.dropna(how='all')
            
            # Clean column names (strip whitespace)
            df.columns = df.columns.str.strip()
            
            # Handle any comma-separated values in cells
            df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
            
            if len(df) > 0:
                print(f"  ✓ Loaded: {csv_file} ({len(df)} rows, {len(df.columns)} columns)")
                dataframes.append(df)
            else:
                print(f"  ⚠ Skipped: {csv_file} (empty after cleaning)")
            
        except Exception as e:
            print(f"  ✗ Error processing {csv_file}: {str(e)}")
    
    # Merge all dataframes in the folder
    if dataframes:
        if len(dataframes) == 1:
            merged_df = dataframes[0]
        else:
            # Merge on index to combine all rows (concatenate)
            merged_df = pd.concat(dataframes, ignore_index=True, sort=False)
        
        # Clean the merged dataframe immediately
        merged_df = clean_dataframe(merged_df)
        
        merged_dataframes[folder_name] = merged_df
        print(f"\n✓ Merged data: {len(merged_df)} total rows, {len(merged_df.columns)} columns")
        print(f"Preview of {folder_name}:")
        print(merged_df.head())
    else:
        print(f"\n✗ No data to merge for {folder_name}")

# Export all merged dataframes to Excel
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Output.xlsx"

print(f"\n{'='*60}")
print("Exporting to Excel...")
print(f"{'='*60}")

def clean_text(x):
    if isinstance(x, str):
        # Remove byte order marks (BOM) and invisible characters
        x = x.replace("\ufeff", "").replace("ÿþ", "")
        # Remove control and illegal Excel characters
        x = re.sub(r"[\x00-\x1F\x7F-\x9F]", "", x)
        # Remove any remaining non-printable Unicode
        x = ''.join(ch for ch in x if ch.isprintable())
        return x.strip()
    return x

def clean_dataframe(df):
    """Clean dataframe using clean_text function"""
    # Apply clean_text to all column names
    df.columns = [clean_text(col) for col in df.columns]
    
    # Rename problematic columns
    df.columns = df.columns.str.replace('Package Name', 'Package_Name', regex=False)
    df.columns = df.columns.str.replace('Package name', 'Package_Name', regex=False)
    
    # Apply clean_text to all object columns
    for col in df.columns:
        if df[col].dtype == 'object':
            df[col] = df[col].apply(clean_text)
    
    return df

def get_safe_sheet_name(name, index):
    """Create a safe sheet name for Excel"""
    # Excel sheet names cannot exceed 31 characters and cannot contain: \ / ? * [ ] :
    safe_name = name.replace('\\', '').replace('/', '').replace('?', '').replace('*', '').replace('[', '').replace(']', '').replace(':', '')
    safe_name = safe_name.strip()[:31]
    return safe_name if safe_name else f"Sheet_{index}"

try:
    excel_max_rows = 1048576
    output_base = output_path.replace('.xlsx', '')
    file_count = 0
    
    with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
        for idx, (folder_name, df) in enumerate(merged_dataframes.items(), 1):
            df = clean_dataframe(df)
            
            # Check if data exceeds Excel row limit
            if len(df) > excel_max_rows:
                print(f"\n  ⚠ {folder_name} has {len(df)} rows (exceeds Excel limit of {excel_max_rows})")
                print(f"    Splitting into multiple files...")
                
                # Split into multiple files
                num_chunks = (len(df) // excel_max_rows) + 1
                for chunk_num in range(num_chunks):
                    start_row = chunk_num * excel_max_rows
                    end_row = (chunk_num + 1) * excel_max_rows
                    chunk_df = df.iloc[start_row:end_row]
                    
                    chunk_file = f"{output_base}_{folder_name}_Part{chunk_num + 1}.xlsx"
                    chunk_df.to_excel(chunk_file, sheet_name=f"Data_Pt{chunk_num + 1}", index=False, engine='openpyxl')
                    print(f"    ✓ Part {chunk_num + 1}: '{chunk_file}' ({len(chunk_df)} rows)")
                    file_count += 1
            else:
                # Use safe sheet name function
                sheet_name = get_safe_sheet_name(folder_name, idx)
                
                df.to_excel(writer, sheet_name=sheet_name, index=False)
                print(f"  ✓ Sheet {idx}: '{sheet_name}' ({len(df)} rows)")
                file_count += 1
    
    print(f"\n✓ Success! Files exported:")
    print(f"  Main file: {output_path}")
    if file_count > 5:
        print(f"  Plus {file_count - 1} additional split files for large datasets")
    
except Exception as e:
    print(f"\n✗ Error during export: {str(e)}")
    print("Attempting individual file export...")
    
    # Fallback: Export each folder to separate file
    try:
        for idx, (folder_name, df) in enumerate(merged_dataframes.items(), 1):
            df = clean_dataframe(df)
            
            excel_max_rows = 1048576
            if len(df) > excel_max_rows:
                print(f"\n  {folder_name}: {len(df)} rows - splitting...")
                num_chunks = (len(df) // excel_max_rows) + 1
                for chunk_num in range(num_chunks):
                    start_row = chunk_num * excel_max_rows
                    end_row = (chunk_num + 1) * excel_max_rows
                    chunk_df = df.iloc[start_row:end_row]
                    
                    chunk_file = output_path.replace('.xlsx', f'_{folder_name}_Part{chunk_num + 1}.xlsx')
                    chunk_df.to_excel(chunk_file, sheet_name='Data_Pt', index=False, engine='openpyxl')
                    print(f"    ✓ {chunk_file} ({len(chunk_df)} rows)")
            else:
                alt_output = output_path.replace('.xlsx', f'_{folder_name}.xlsx')
                sheet_name = get_safe_sheet_name(folder_name, idx)
                df.to_excel(alt_output, sheet_name=sheet_name, index=False, engine='openpyxl')
                print(f"  ✓ {alt_output} ({len(df)} rows)")
        
        print(f"\n✓ Alternative export completed!")
    except Exception as e2:
        print(f"\n✗ Export failed: {str(e2)}")
        print("Ensure openpyxl is installed: pip install openpyxl")

# Display summary
print(f"\n{'='*60}")
print("SUMMARY - 5 Merged Dataframes")
print(f"{'='*60}")
for idx, (folder_name, df) in enumerate(merged_dataframes.items(), 1):
    print(f"{idx}. {folder_name}")
    print(f"   Shape: {df.shape[0]} rows × {df.shape[1]} columns")
    print(f"   Columns: {', '.join(df.columns[:5])}{'...' if len(df.columns) > 5 else ''}\n")
In [3]:
import pandas as pd

# === 1️⃣ Read the CSV file ===
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\cleaned_Merged_Output_Stats_Ratings.csv"   # 🔁 change this to your file name
df = pd.read_csv(file_path)

# === 2️⃣ Convert 'Date' column to datetime ===
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Drop rows without valid dates
df = df.dropna(subset=['Date'])

# === 3️⃣ Sort data by date (from July 1 → July 31, 2024) ===
df = df.sort_values(by='Date', ascending=True)

# === 4️⃣ Save as a new sorted CSV ===
sorted_file = "July2024_Sorted.csv"
df.to_csv(sorted_file, index=False)

print("✅ Data sorted successfully by date and saved as 'July2024_Sorted.csv'")
✅ Data sorted successfully by date and saved as 'July2024_Sorted.csv'
In [8]:
!pip install textblob
Collecting textblob
  Downloading textblob-0.19.0-py3-none-any.whl.metadata (4.4 kB)
Requirement already satisfied: nltk>=3.9 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from textblob) (3.9.1)
Requirement already satisfied: click in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from nltk>=3.9->textblob) (8.1.8)
Requirement already satisfied: joblib in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from nltk>=3.9->textblob) (1.4.2)
Requirement already satisfied: regex>=2021.8.3 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from nltk>=3.9->textblob) (2024.11.6)
Requirement already satisfied: tqdm in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from nltk>=3.9->textblob) (4.67.1)
Requirement already satisfied: colorama in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from click->nltk>=3.9->textblob) (0.4.6)
Downloading textblob-0.19.0-py3-none-any.whl (624 kB)
   ---------------------------------------- 0.0/624.3 kB ? eta -:--:--
   ---------------------------------------- 0.0/624.3 kB ? eta -:--:--
   ---------------------------------------- 0.0/624.3 kB ? eta -:--:--
   ---------------------------------------- 0.0/624.3 kB ? eta -:--:--
   ---------------------------------------- 0.0/624.3 kB ? eta -:--:--
   ---------------------------------------- 0.0/624.3 kB ? eta -:--:--
   ---------------- ----------------------- 262.1/624.3 kB ? eta -:--:--
   ---------------- ----------------------- 262.1/624.3 kB ? eta -:--:--
   ------------------------------- ------ 524.3/624.3 kB 656.5 kB/s eta 0:00:01
   -------------------------------------- 624.3/624.3 kB 584.6 kB/s eta 0:00:00
Installing collected packages: textblob
Successfully installed textblob-0.19.0
In [9]:
import nltk
nltk.download('punkt')
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Out[9]:
True
In [11]:
print(df.columns.tolist())
['Package_Name', 'App Version Code', 'App Version Name', 'Reviewer Language', 'Device', 'Review Submit Date and Time', 'Review Submit Millis Since Epoch', 'Review Last Update Date and Time', 'Review Last Update Millis Since Epoch', 'Star Rating', 'Review Title', 'Review Text', 'Developer Reply Date and Time', 'Developer Reply Millis Since Epoch', 'Developer Reply Text', 'Review Link', 'Edit_Gap_Days']
In [13]:
df['Rating'] = pd.to_numeric(df['Star Rating'], errors ='coerce')
In [15]:
df.rename(columns={'Star Rating': 'Rating'}, inplace=True)
df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')
df = df[df['Rating'].between(1, 5)]
In [17]:
df.rename(columns={'Star Rating': 'Rating'}, inplace=True)
In [19]:
df.head()
Out[19]:
Package_Name App Version Code App Version Name Reviewer Language Device Review Submit Date and Time Review Submit Millis Since Epoch Review Last Update Date and Time Review Last Update Millis Since Epoch Star Rating Review Title Review Text Developer Reply Date and Time Developer Reply Millis Since Epoch Developer Reply Text Review Link Edit_Gap_Days
0 com.app.grow.greener.plants 4.0 1.3 ru RE588E 2024-07-02 12:46:52+00:00 1.719924e+12 2024-07-02 12:46:52+00:00 1.719924e+12 1.0 NaN NaN NaN NaN NaN NaN 0
1 com.app.grow.greener.plants NaN NaN ru RMX1993L1 2024-07-02 14:45:11+00:00 1.719932e+12 2024-07-02 14:45:11+00:00 1.719932e+12 1.0 NaN 5 @01>B05B. NaN NaN NaN http://play.google.com/console/developers/5478... 0
2 com.app.grow.greener.plants 13.0 2.2 ru TECNO-LH6n 2024-07-04 05:01:01+00:00 1.720069e+12 2024-07-20 19:32:56+00:00 1.721504e+12 5.0 NaN @8;>65=85 AC?5@ 8=B5@D59A @04C5B 8 70@01>B>: B... NaN NaN NaN http://play.google.com/console/developers/5478... 16
3 com.app.grow.greener.plants 4.0 1.3 es a04e 2024-07-05 18:21:35+00:00 1.720204e+12 2024-07-05 18:21:35+00:00 1.720204e+12 5.0 NaN NaN NaN NaN NaN NaN 0
4 com.app.grow.greener.plants NaN NaN pt mdh15lm 2024-07-08 23:59:48+00:00 1.720483e+12 2024-07-08 23:59:48+00:00 1.720483e+12 5.0 NaN NaN NaN NaN NaN NaN 0
In [21]:
import pandas as pd
import os

# Path to your CSV or Excel file
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\cleaned_Merged_Output.csv"

# Read file (auto-detect format)
if file_path.endswith('.csv'):
    df = pd.read_csv(file_path)
else:
    df = pd.read_excel(file_path, engine='openpyxl')

# ✅ STEP 1: Clean column names (remove spaces and normalize for matching)
df.columns = df.columns.str.strip()   # Remove spaces around names

# ✅ STEP 2: Check for 'Rating' or 'Star Rating' column
rating_col = None
for col in df.columns:
    if col.lower() in ['rating', 'star rating', 'star_rating']:
        rating_col = col
        break

if rating_col is None:
    raise KeyError("No column found matching 'Rating' or 'Star Rating'. Please verify your dataset.")

print(f"Using column: {rating_col}")

# ✅ STEP 3: Clean and filter ratings
df[rating_col] = pd.to_numeric(df[rating_col], errors='coerce')  # Convert to numeric
df = df[df[rating_col].between(1, 5)]  # Keep only valid ratings 1–5

# ✅ STEP 4: Fill NaNs for other columns if needed
df = df.fillna('')

# ✅ STEP 5: Export cleaned file
output_path = os.path.splitext(file_path)[0] + "_cleaned.csv"
df.to_csv(output_path, index=False)
print(f"\n✅ Cleaned file saved successfully as: {output_path}")
Using column: Star Rating

✅ Cleaned file saved successfully as: C:\Users\hp\Desktop\WEEK 3_DSHub\cleaned_Merged_Output_cleaned.csv
In [ ]:
#SENTIMENT ANALYSIS
In [25]:
# ==============================
# STEP 1: Import Dependencies
# ==============================
import pandas as pd
import matplotlib.pyplot as plt
from textblob import TextBlob

# ==============================
# STEP 2: Load Your CSV File
# ==============================
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\cleaned_Merged_Output_cleaned.csv"  # <-- update path if needed
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)

print("✅ Data loaded successfully!")
print("Shape:", df.shape)
print("\nColumns:", list(df.columns))
print("\nPreview:\n", df.head())

# ==============================
# STEP 3: Rename Columns (Standardize Names)
# ==============================
df.rename(columns={
    'Star Rating': 'Rating',
    'Review Submit Date and Time': 'Submit_Date',
    'Review Last Update Date and Time': 'Last_Update_Date'
}, inplace=True)

# ==============================
# STEP 4: Clean & Convert Date Columns
# ==============================
# Convert messy date strings into proper datetime format
for col in ['Submit_Date', 'Last_Update_Date']:
    df[col] = pd.to_datetime(df[col], errors='coerce')

# Drop rows where both dates are missing
df = df.dropna(subset=['Submit_Date', 'Last_Update_Date'], how='all')

# ==============================
# STEP 5: Calculate Time Difference (Editing Gap)
# ==============================
df['Edit_Gap_Days'] = (df['Last_Update_Date'] - df['Submit_Date']).dt.days
df['Edit_Gap_Days'] = df['Edit_Gap_Days'].fillna(0)

# ==============================
# STEP 6: Clean & Normalize Rating Column
# ==============================
df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')
df = df[df['Rating'].between(1, 5)]  # Keep only valid ratings (1–5)

# ==============================
# STEP 7: Sentiment Analysis (Optional)
# ==============================
if 'Review Text' in df.columns:
    df['Sentiment'] = df['Review Text'].astype(str).apply(lambda x: TextBlob(x).sentiment.polarity)
else:
    print("⚠️ 'Review Text' column not found — skipping sentiment analysis.")

# ==============================
# STEP 8: Basic Descriptive Statistics
# ==============================
print("\n===== Rating Summary =====")
print(df)
✅ Data loaded successfully!
Shape: (995, 16)

Columns: ['Package_Name', 'App Version Code', 'App Version Name', 'Reviewer Language', 'Device', 'Review Submit Date and Time', 'Review Submit Millis Since Epoch', 'Review Last Update Date and Time', 'Review Last Update Millis Since Epoch', 'Star Rating', 'Review Title', 'Review Text', 'Developer Reply Date and Time', 'Developer Reply Millis Since Epoch', 'Developer Reply Text', 'Review Link']

Preview:
                   Package_Name  App Version Code  App Version Name  \
0  com.app.grow.greener.plants               4.0               1.3   
1  com.app.grow.greener.plants               NaN               NaN   
2  com.app.grow.greener.plants               4.0               1.3   
3  com.app.grow.greener.plants               NaN               NaN   
4  com.app.grow.greener.plants              13.0               2.2   

  Reviewer Language     Device Review Submit Date and Time  \
0                ru     RE588E        2024-07-02T12:46:52Z   
1                ru  RMX1993L1        2024-07-02T14:45:11Z   
2                es       a04e        2024-07-05T18:21:35Z   
3                pt    mdh15lm        2024-07-08T23:59:48Z   
4                pt        a10        2024-07-15T15:58:01Z   

   Review Submit Millis Since Epoch Review Last Update Date and Time  \
0                      1.719924e+12             2024-07-02T12:46:52Z   
1                      1.719932e+12             2024-07-02T14:45:11Z   
2                      1.720204e+12             2024-07-05T18:21:35Z   
3                      1.720483e+12             2024-07-08T23:59:48Z   
4                      1.721059e+12             2024-07-15T15:58:09Z   

   Review Last Update Millis Since Epoch  Star Rating  Review Title  \
0                           1.719924e+12          1.0           NaN   
1                           1.719932e+12          1.0           NaN   
2                           1.720204e+12          5.0           NaN   
3                           1.720483e+12          5.0           NaN   
4                           1.721059e+12          5.0           NaN   

        Review Text Developer Reply Date and Time  \
0               NaN                           NaN   
1       5 @01>B05B.                           NaN   
2               NaN                           NaN   
3               NaN                           NaN   
4  Ótimo aplicativo                           NaN   

  Developer Reply Millis Since Epoch Developer Reply Text  \
0                                NaN                  NaN   
1                                NaN                  NaN   
2                                NaN                  NaN   
3                                NaN                  NaN   
4                                NaN                  NaN   

                                         Review Link  
0                                                NaN  
1  http://play.google.com/console/developers/5478...  
2                                                NaN  
3                                                NaN  
4  http://play.google.com/console/developers/5478...  

===== Rating Summary =====
                    Package_Name  App Version Code  App Version Name  \
0    com.app.grow.greener.plants               4.0               1.3   
1    com.app.grow.greener.plants               NaN               NaN   
2    com.app.grow.greener.plants               4.0               1.3   
3    com.app.grow.greener.plants               NaN               NaN   
4    com.app.grow.greener.plants              13.0               2.2   
..                           ...               ...               ...   
990  com.app.grow.greener.plants              13.0               2.2   
991  com.app.grow.greener.plants              13.0               2.2   
992  com.app.grow.greener.plants              13.0               2.2   
993  com.app.grow.greener.plants              13.0               2.2   
994  com.app.grow.greener.plants              13.0               2.2   

    Reviewer Language         Device               Submit_Date  \
0                  ru         RE588E 2024-07-02 12:46:52+00:00   
1                  ru      RMX1993L1 2024-07-02 14:45:11+00:00   
2                  es           a04e 2024-07-05 18:21:35+00:00   
3                  pt        mdh15lm 2024-07-08 23:59:48+00:00   
4                  pt            a10 2024-07-15 15:58:01+00:00   
..                ...            ...                       ...   
990                bg            a50 2024-07-21 13:15:33+00:00   
991                id       star2lte 2024-07-21 13:16:19+00:00   
992                ru  Infinix-X6516 2024-07-21 13:21:56+00:00   
993                id          cloud 2024-07-21 13:26:27+00:00   
994                hu          vicky 2024-07-21 13:29:41+00:00   

     Review Submit Millis Since Epoch          Last_Update_Date  \
0                        1.719924e+12 2024-07-02 12:46:52+00:00   
1                        1.719932e+12 2024-07-02 14:45:11+00:00   
2                        1.720204e+12 2024-07-05 18:21:35+00:00   
3                        1.720483e+12 2024-07-08 23:59:48+00:00   
4                        1.721059e+12 2024-07-15 15:58:09+00:00   
..                                ...                       ...   
990                      1.721568e+12 2024-07-21 13:15:33+00:00   
991                      1.721568e+12 2024-07-21 13:16:19+00:00   
992                      1.721568e+12 2024-07-21 13:21:56+00:00   
993                      1.721568e+12 2024-07-21 13:26:27+00:00   
994                      1.721569e+12 2024-07-21 13:29:41+00:00   

     Review Last Update Millis Since Epoch  Rating  Review Title  \
0                             1.719924e+12     1.0           NaN   
1                             1.719932e+12     1.0           NaN   
2                             1.720204e+12     5.0           NaN   
3                             1.720483e+12     5.0           NaN   
4                             1.721059e+12     5.0           NaN   
..                                     ...     ...           ...   
990                           1.721568e+12     5.0           NaN   
991                           1.721568e+12     5.0           NaN   
992                           1.721568e+12     5.0           NaN   
993                           1.721568e+12     5.0           NaN   
994                           1.721569e+12     3.0           NaN   

                                           Review Text  \
0                                                  NaN   
1                                          5 @01>B05B.   
2                                                  NaN   
3                                                  NaN   
4                                     Ótimo aplicativo   
..                                                 ...   
990                                                NaN   
991  Semoga terbuki membayar dan sukses terus game nya   
992                                                NaN   
993                                      Saya coba duu   
994  Még csak ma kezdtem. Meglátom hogy ér-e öt csi...   

    Developer Reply Date and Time Developer Reply Millis Since Epoch  \
0                             NaN                                NaN   
1                             NaN                                NaN   
2                             NaN                                NaN   
3                             NaN                                NaN   
4                             NaN                                NaN   
..                            ...                                ...   
990                           NaN                                NaN   
991                           NaN                                NaN   
992                           NaN                                NaN   
993                           NaN                                NaN   
994                           NaN                                NaN   

    Developer Reply Text                                        Review Link  \
0                    NaN                                                NaN   
1                    NaN  http://play.google.com/console/developers/5478...   
2                    NaN                                                NaN   
3                    NaN                                                NaN   
4                    NaN  http://play.google.com/console/developers/5478...   
..                   ...                                                ...   
990                  NaN                                                NaN   
991                  NaN  http://play.google.com/console/developers/5478...   
992                  NaN                                                NaN   
993                  NaN  http://play.google.com/console/developers/5478...   
994                  NaN  http://play.google.com/console/developers/5478...   

     Edit_Gap_Days  Sentiment  
0                0        0.0  
1                0        0.0  
2                0        0.0  
3                0        0.0  
4                0        0.0  
..             ...        ...  
990              0        0.0  
991              0       -0.4  
992              0        0.0  
993              0        0.0  
994              0        0.0  

[995 rows x 18 columns]
In [30]:
import pandas as pd

# ==============================
# STEP 1: Load the existing cleaned file
# ==============================
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\cleaned_Merged_Output_cleaned.csv"
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)

print("✅ File loaded successfully.")
print("Before cleaning:", df.shape)

# ==============================
# STEP 2: Drop unwanted columns
# ==============================
cols_to_drop = [
    "Review Last Update Millis Since Epoch",
    "Developer Reply Date and Time",
    "Developer Reply Millis Since Epoch",
    "Review Title",
    "Developer Reply Text"
]

# Drop columns if they exist
df = df.drop(columns=[c for c in cols_to_drop if c in df.columns], errors='ignore')

# ==============================
# STEP 3: Remove rows with missing key values
# ==============================
required_cols = ["Star Rating", "Review Submit Date and Time", "Review Last Update Date and Time"]

# Drop rows that have NaN in any of these important columns
df = df.dropna(subset=required_cols, how='any')

print("After cleaning:", df.shape)

# ==============================
# STEP 4: Export the cleaned dataset
# ==============================
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Final_Review_Analysis_Cleaned.csv"
df.to_csv(output_path, index=False, encoding='utf-8')

print(f"✅ Cleaning complete! File saved at:\n{output_path}")
✅ File loaded successfully.
Before cleaning: (995, 16)
After cleaning: (995, 11)
✅ Cleaning complete! File saved at:
C:\Users\hp\Desktop\WEEK 3_DSHub\Final_Review_Analysis_Cleaned.csv
In [28]:
print(df.columns.tolist())
['Package_Name', 'App Version Code', 'App Version Name', 'Reviewer Language', 'Device', 'Review Submit Date and Time', 'Review Submit Millis Since Epoch', 'Review Last Update Date and Time', 'Star Rating', 'Review Text', 'Review Link']
In [32]:
df.head()
Out[32]:
Package_Name App Version Code App Version Name Reviewer Language Device Review Submit Date and Time Review Submit Millis Since Epoch Review Last Update Date and Time Star Rating Review Text Review Link
0 com.app.grow.greener.plants 4.0 1.3 ru RE588E 2024-07-02T12:46:52Z 1.719924e+12 2024-07-02T12:46:52Z 1.0 NaN NaN
1 com.app.grow.greener.plants NaN NaN ru RMX1993L1 2024-07-02T14:45:11Z 1.719932e+12 2024-07-02T14:45:11Z 1.0 5 @01>B05B. http://play.google.com/console/developers/5478...
2 com.app.grow.greener.plants 4.0 1.3 es a04e 2024-07-05T18:21:35Z 1.720204e+12 2024-07-05T18:21:35Z 5.0 NaN NaN
3 com.app.grow.greener.plants NaN NaN pt mdh15lm 2024-07-08T23:59:48Z 1.720483e+12 2024-07-08T23:59:48Z 5.0 NaN NaN
4 com.app.grow.greener.plants 13.0 2.2 pt a10 2024-07-15T15:58:01Z 1.721059e+12 2024-07-15T15:58:09Z 5.0 Ótimo aplicativo http://play.google.com/console/developers/5478...
In [34]:
# ==============================
# STEP 1: Import Dependencies
# ==============================
import pandas as pd
import matplotlib.pyplot as plt
from textblob import TextBlob

# ==============================
# STEP 2: Load Your CSV File
# ==============================
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\cleaned_Merged_Output_cleaned.csv"  # update if needed
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)

print("✅ Data loaded successfully!")
print("Shape:", df.shape)
print("\nColumns:", list(df.columns))
print("\nPreview:\n", df.head())

# ==============================
# STEP 4: Clean & Convert Date Columns
# ==============================
for col in ['Review Submit Date and Time', 'Review Last Update Date and Time']:
    df[col] = pd.to_datetime(df[col], errors='coerce')

# Drop rows where both dates are missing
df = df.dropna(subset=['Review Submit Date and Time', 'Review Last Update Date and Time'], how='all')

# ==============================
# STEP 5: Calculate Time Difference (Editing Gap)
# ==============================
df['Edit_Gap_Days'] = (df['Review Last Update Date and Time'] - df['Review Submit Date and Time']).dt.days
df['Edit_Gap_Days'] = df['Edit_Gap_Days'].fillna(0)

# ==============================
# STEP 6: Clean & Normalize Rating Column
# ==============================
df['Star Rating'] = pd.to_numeric(df['Star Rating'], errors='coerce')
df = df[df['Star Rating'].between(1, 5)]


# ==============================
# STEP 8: Basic Descriptive Statistics
# ==============================
print("\n===== Rating Summary =====")
print(df['Star Rating'].describe())

print("\n===== Edit Gap Summary =====")
print(df['Edit_Gap_Days'].describe())

# ==============================
# STEP 9: Visualize Key Insights
# ==============================
# --- Ratings Distribution ---
plt.figure(figsize=(7,4))
df['Star Rating'].value_counts().sort_index().plot(kind='bar', color='skyblue')
plt.title('Distribution of Ratings')
plt.xlabel('Rating')
plt.ylabel('Count')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

# --- Average Edit Gap by Rating ---
plt.figure(figsize=(7,4))
df.groupby('Star Rating')['Edit_Gap_Days'].mean().plot(kind='bar', color='orange')
plt.title('Average Edit Gap by Rating')
plt.xlabel('Star Rating')
plt.ylabel('Average Edit Gap (Days)')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

# ==============================
# STEP 10: Export Cleaned Dataset
# ==============================
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Final_Review_Analysis.csv"
df.to_csv(output_path, index=False, encoding='utf-8')
print(f"\n✅ Cleaned and analyzed data saved successfully at:\n{output_path}")
✅ Data loaded successfully!
Shape: (995, 16)

Columns: ['Package_Name', 'App Version Code', 'App Version Name', 'Reviewer Language', 'Device', 'Review Submit Date and Time', 'Review Submit Millis Since Epoch', 'Review Last Update Date and Time', 'Review Last Update Millis Since Epoch', 'Star Rating', 'Review Title', 'Review Text', 'Developer Reply Date and Time', 'Developer Reply Millis Since Epoch', 'Developer Reply Text', 'Review Link']

Preview:
                   Package_Name  App Version Code  App Version Name  \
0  com.app.grow.greener.plants               4.0               1.3   
1  com.app.grow.greener.plants               NaN               NaN   
2  com.app.grow.greener.plants               4.0               1.3   
3  com.app.grow.greener.plants               NaN               NaN   
4  com.app.grow.greener.plants              13.0               2.2   

  Reviewer Language     Device Review Submit Date and Time  \
0                ru     RE588E        2024-07-02T12:46:52Z   
1                ru  RMX1993L1        2024-07-02T14:45:11Z   
2                es       a04e        2024-07-05T18:21:35Z   
3                pt    mdh15lm        2024-07-08T23:59:48Z   
4                pt        a10        2024-07-15T15:58:01Z   

   Review Submit Millis Since Epoch Review Last Update Date and Time  \
0                      1.719924e+12             2024-07-02T12:46:52Z   
1                      1.719932e+12             2024-07-02T14:45:11Z   
2                      1.720204e+12             2024-07-05T18:21:35Z   
3                      1.720483e+12             2024-07-08T23:59:48Z   
4                      1.721059e+12             2024-07-15T15:58:09Z   

   Review Last Update Millis Since Epoch  Star Rating  Review Title  \
0                           1.719924e+12          1.0           NaN   
1                           1.719932e+12          1.0           NaN   
2                           1.720204e+12          5.0           NaN   
3                           1.720483e+12          5.0           NaN   
4                           1.721059e+12          5.0           NaN   

        Review Text Developer Reply Date and Time  \
0               NaN                           NaN   
1       5 @01>B05B.                           NaN   
2               NaN                           NaN   
3               NaN                           NaN   
4  Ótimo aplicativo                           NaN   

  Developer Reply Millis Since Epoch Developer Reply Text  \
0                                NaN                  NaN   
1                                NaN                  NaN   
2                                NaN                  NaN   
3                                NaN                  NaN   
4                                NaN                  NaN   

                                         Review Link  
0                                                NaN  
1  http://play.google.com/console/developers/5478...  
2                                                NaN  
3                                                NaN  
4  http://play.google.com/console/developers/5478...  

===== Rating Summary =====
count    995.000000
mean       4.558794
std        0.937327
min        1.000000
25%        5.000000
50%        5.000000
75%        5.000000
max        5.000000
Name: Star Rating, dtype: float64

===== Edit Gap Summary =====
count    995.000000
mean       0.019095
std        0.515979
min        0.000000
25%        0.000000
50%        0.000000
75%        0.000000
max       16.000000
Name: Edit_Gap_Days, dtype: float64
No description has been provided for this image
No description has been provided for this image
✅ Cleaned and analyzed data saved successfully at:
C:\Users\hp\Desktop\WEEK 3_DSHub\Final_Review_Analysis.csv
In [35]:
df.head()
Out[35]:
Package_Name App Version Code App Version Name Reviewer Language Device Review Submit Date and Time Review Submit Millis Since Epoch Review Last Update Date and Time Review Last Update Millis Since Epoch Star Rating Review Title Review Text Developer Reply Date and Time Developer Reply Millis Since Epoch Developer Reply Text Review Link Edit_Gap_Days
0 com.app.grow.greener.plants 4.0 1.3 ru RE588E 2024-07-02 12:46:52+00:00 1.719924e+12 2024-07-02 12:46:52+00:00 1.719924e+12 1.0 NaN NaN NaN NaN NaN NaN 0
1 com.app.grow.greener.plants NaN NaN ru RMX1993L1 2024-07-02 14:45:11+00:00 1.719932e+12 2024-07-02 14:45:11+00:00 1.719932e+12 1.0 NaN 5 @01>B05B. NaN NaN NaN http://play.google.com/console/developers/5478... 0
2 com.app.grow.greener.plants 4.0 1.3 es a04e 2024-07-05 18:21:35+00:00 1.720204e+12 2024-07-05 18:21:35+00:00 1.720204e+12 5.0 NaN NaN NaN NaN NaN NaN 0
3 com.app.grow.greener.plants NaN NaN pt mdh15lm 2024-07-08 23:59:48+00:00 1.720483e+12 2024-07-08 23:59:48+00:00 1.720483e+12 5.0 NaN NaN NaN NaN NaN NaN 0
4 com.app.grow.greener.plants 13.0 2.2 pt a10 2024-07-15 15:58:01+00:00 1.721059e+12 2024-07-15 15:58:09+00:00 1.721059e+12 5.0 NaN Ótimo aplicativo NaN NaN NaN http://play.google.com/console/developers/5478... 0

User Retention & Chun

In [37]:
#returning VS one-time users
user_activity = df.groupby('Device').size().reset_index(name='Review_Count')
user_activity
Out[37]:
Device Review_Count
0 1807 1
1 1808 1
2 1816 1
3 1820 2
4 1901 1
... ... ...
470 vince 2
471 viva 1
472 water 1
473 willow 2
474 zircon 1

475 rows × 2 columns

Users with Review_Count > 1 → retained users

Users with Review_Count == 1 → churned users (one-time reviewers)

In [ ]:
#Count and Tabulate Retained vs One-Time User
In [38]:
import pandas as pd

# Load your cleaned CSV
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Final_Review_Analysis_Cleaned.csv"
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)

# Ensure the key column exists
if 'Device' not in df.columns:
    raise ValueError("❌ 'Device' column not found in dataset!")

# Count number of reviews per device (user)
user_activity = df.groupby('Device').size().reset_index(name='Review_Count')

# Classify users
user_activity['User_Type'] = user_activity['Review_Count'].apply(
    lambda x: 'Retained (Reviewed > 1 time)' if x > 1 else 'One-time (Churned)'
)

# Tabulate summary
summary = user_activity['User_Type'].value_counts().reset_index()
summary.columns = ['User_Type', 'User_Count']

# Add retention rate
total_users = summary['User_Count'].sum()
summary['Percentage'] = (summary['User_Count'] / total_users * 100).round(2)

print("===== USER RETENTION & CHURN SUMMARY =====")
print(summary)

# Optional: Export to CSV
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\User_Retention_Summary.csv"
summary.to_csv(output_path, index=False)
print(f"\n✅ Summary saved to: {output_path}")
===== USER RETENTION & CHURN SUMMARY =====
                      User_Type  User_Count  Percentage
0            One-time (Churned)         263       55.37
1  Retained (Reviewed > 1 time)         212       44.63

✅ Summary saved to: C:\Users\hp\Desktop\WEEK 3_DSHub\User_Retention_Summary.csv
In [39]:
import pandas as pd
import matplotlib.pyplot as plt

# ==============================
# STEP 1: Load Your Cleaned CSV
# ==============================
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Final_Review_Analysis_Cleaned.csv"
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)

# ==============================
# STEP 2: Check for 'Device' Column
# ==============================
if 'Device' not in df.columns:
    raise ValueError("❌ 'Device' column not found in dataset!")

# ==============================
# STEP 3: Count Reviews per User (Device)
# ==============================
user_activity = df.groupby('Device').size().reset_index(name='Review_Count')

# ==============================
# STEP 4: Classify Users
# ==============================
user_activity['User_Type'] = user_activity['Review_Count'].apply(
    lambda x: 'Retained (Reviewed > 1 time)' if x > 1 else 'One-time (Churned)'
)

# ==============================
# STEP 5: Tabulate Summary
# ==============================
summary = user_activity['User_Type'].value_counts().reset_index()
summary.columns = ['User_Type', 'User_Count']

# Add retention percentage
total_users = summary['User_Count'].sum()
summary['Percentage'] = (summary['User_Count'] / total_users * 100).round(2)

print("===== USER RETENTION & CHURN SUMMARY =====")
print(summary)

# ==============================
# STEP 6: Visualization
# ==============================

# --- Bar Chart ---
plt.figure(figsize=(7,4))
plt.bar(summary['User_Type'], summary['User_Count'], color=['green', 'red'])
plt.title('User Retention vs Churn')
plt.xlabel('User Type')
plt.ylabel('User Count')
plt.grid(axis='y', linestyle='--', alpha=0.7)
for i, v in enumerate(summary['User_Count']):
    plt.text(i, v + (v * 0.01), str(v), ha='center', fontweight='bold')
plt.show()

# --- Pie Chart ---
plt.figure(figsize=(5,5))
plt.pie(summary['User_Count'], labels=summary['User_Type'], autopct='%1.1f%%', 
        colors=['green', 'red'], startangle=90, explode=(0.05, 0))
plt.title('User Retention vs Churn (Percentage)')
plt.show()

# ==============================
# STEP 7: Export Summary
# ==============================
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\User_Retention_Summary.csv"
summary.to_csv(output_path, index=False)
print(f"\n✅ Summary and charts generated successfully! File saved to:\n{output_path}")
===== USER RETENTION & CHURN SUMMARY =====
                      User_Type  User_Count  Percentage
0            One-time (Churned)         263       55.37
1  Retained (Reviewed > 1 time)         212       44.63
No description has been provided for this image
No description has been provided for this image
✅ Summary and charts generated successfully! File saved to:
C:\Users\hp\Desktop\WEEK 3_DSHub\User_Retention_Summary.csv
In [40]:
# Save as PNG
plt.savefig("sample_plot.png", dpi=300, bbox_inches='tight')

# Save as PDF
plt.savefig("sample_plot.pdf", bbox_inches='tight')
<Figure size 640x480 with 0 Axes>
In [ ]:
#Install & Install Trends
In [42]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")  # prettier plots
In [71]:
# Replace paths with your actual CSV file locations
csv_files = [
    r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install\install_july.csv",
    r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install_Aug.csv",
    r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install_sept.csv",
    r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install_oct.csv",
    r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install_nov.csv",
    r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install_dec.csv"
]
In [64]:
cols_to_keep = ['Date', 'Package Name', 'Daily Device Installs', 'Daily Device Uninstalls', 'Daily Device Upgrades', 'Active Device Installs']
In [76]:
import pandas as pd
import glob

# Folder containing your monthly CSVs (July–December)
path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install"

# Collect all CSVs in that folder
files = glob.glob(path + "/*.csv")

# Target columns to keep
cols_to_keep = [
    "Date", "Package Name", "Daily Device Installs",
    "Daily Device Uninstalls", "Total User Installs"
]

merged_data = []

for file in files:
    try:
        temp = pd.read_csv(file, encoding='utf-8', low_memory=False)
    except UnicodeDecodeError:
        temp = pd.read_csv(file, encoding='latin1', low_memory=False)
    
    # Normalize column names
    temp.columns = temp.columns.str.strip().str.lower()
    
    # Map possible column variations
    rename_map = {
        'package name': 'Package Name',
        'package_name': 'Package Name',
        'date': 'Date',
        'daily device installs': 'Daily Device Installs',
        'daily_device_installs': 'Daily Device Installs',
        'daily device uninstalls': 'Daily Device Uninstalls',
        'daily_device_uninstalls': 'Daily Device Uninstalls',
        'total user installs': 'Total User Installs',
        'total_user_installs': 'Total User Installs'
    }
    temp.rename(columns=rename_map, inplace=True)

    # Keep only required columns
    available_cols = [col for col in cols_to_keep if col in temp.columns]
    temp = temp[available_cols]
    
    # Add a column to track which month file this came from
    temp['Source_File'] = file.split("\\")[-1]
    
    merged_data.append(temp)

# Merge all months
df = pd.concat(merged_data, ignore_index=True)

# Clean date column
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Sort by date
df = df.sort_values(by='Date')

print("✅ Data merged successfully!")
print("Shape:", df.shape)
print("\nColumns:", df.columns.tolist())
print("\nMonths included:")
print(df['Source_File'].unique())

# Save cleaned merged file
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Install_Uninstall_Data.csv"
df.to_csv(output_path, index=False, encoding='utf-8')
print(f"\n✅ Cleaned and merged data saved to:\n{output_path}")
✅ Data merged successfully!
Shape: (8776, 6)

Columns: ['Date', 'Package Name', 'Daily Device Installs', 'Daily Device Uninstalls', 'Total User Installs', 'Source_File']

Months included:
['install_Aug.csv' 'install_july.csv' 'install_sept.csv' 'install_oct.csv'
 'install_nov.csv' 'install_dec.csv']

✅ Cleaned and merged data saved to:
C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Install_Uninstall_Data.csv
In [66]:
import pandas as pd
import glob

folder_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install.csv"
files = glob.glob(folder_path + "/*.csv")

for file in files:
    df = pd.read_csv(file, encoding='utf-8', low_memory=False)
    print(f"\n📂 {file}")
    print(df.columns.tolist())
In [78]:
#verify all months are included

# Extract unique months and their record counts
merged_df["Month"] = merged_df["Date"].dt.month_name()
month_summary = merged_df["Month"].value_counts().sort_index()

print("\n===== MONTHLY DATA COVERAGE =====")
print(month_summary)
print("\n📆 Unique months found:", merged_df["Month"].unique())
===== MONTHLY DATA COVERAGE =====
Month
August       124
December     486
July        2482
November      30
October       31
Name: count, dtype: int64

📆 Unique months found: ['August' 'December' 'July' 'November' 'October']
In [139]:
import os

folder_path =  r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install"  # adjust this
files = [f for f in os.listdir(folder_path) if f.endswith(".csv")]

print("All CSV files found:\n", files)
All CSV files found:
 ['install_Aug.csv', 'install_dec.csv', 'install_july.csv', 'install_nov.csv', 'install_oct.csv', 'install_sept.csv']
In [116]:
pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data_Cleaned.csv", encoding='utf-8', low_memory=False)
Out[116]:
Date Package name Carrier Daily Device Installs Daily Device Uninstalls Daily Device Upgrades Total User Installs Daily User Installs Daily User Uninstalls Active Device Installs ... Uninstall events Month Android OS Version Country Package Name Daily Crashes Daily ANRs App Version Code Source_File Country_Full
0 7/2/2024 com.app.grow.greener.plants NaN 6.0 0.0 0.0 0.0 4.0 3.0 3.0 ... 4.0 Install_july NaN BG NaN NaN NaN NaN NaN Bulgaria
1 7/2/2024 com.app.grow.greener.plants NaN 1.0 0.0 0.0 0.0 1.0 0.0 1.0 ... 0.0 Install_july NaN BR NaN NaN NaN NaN NaN Brazil
2 7/2/2024 com.app.grow.greener.plants NaN 4.0 0.0 0.0 0.0 3.0 9.0 2.0 ... 9.0 Install_july NaN DE NaN NaN NaN NaN NaN Germany
3 7/2/2024 com.app.grow.greener.plants NaN 2.0 0.0 0.0 0.0 1.0 0.0 1.0 ... 0.0 Install_july NaN HU NaN NaN NaN NaN NaN Hungary
4 7/2/2024 com.app.grow.greener.plants NaN 1.0 0.0 0.0 0.0 1.0 0.0 1.0 ... 0.0 Install_july NaN MY NaN NaN NaN NaN NaN Malaysia
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2470 7/31/2024 com.app.grow.greener.plants NaN 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 Install_july NaN XK NaN NaN NaN NaN NaN XK
2471 7/31/2024 com.app.grow.greener.plants NaN 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 Install_july NaN YE NaN NaN NaN NaN NaN Yemen
2472 7/31/2024 com.app.grow.greener.plants NaN 2.0 0.0 0.0 0.0 2.0 3.0 27.0 ... 4.0 Install_july NaN ZA NaN NaN NaN NaN NaN South Africa
2473 7/31/2024 com.app.grow.greener.plants NaN 0.0 0.0 0.0 0.0 0.0 0.0 2.0 ... 0.0 Install_july NaN ZM NaN NaN NaN NaN NaN Zambia
2474 7/31/2024 com.app.grow.greener.plants NaN 6.0 0.0 0.0 0.0 8.0 5.0 27.0 ... 5.0 Install_july NaN ZZ NaN NaN NaN NaN NaN ZZ

2475 rows × 22 columns

In [9]:
print("===== MONTHLY DATA COVERAGE =====")
print(merged_df['Month'].value_counts())
print("\n📆 Unique months found:", merged_df['Month'].unique())
===== MONTHLY DATA COVERAGE =====
Month
Install_aug     5685
Install_july    2482
Install_dec      486
Install_sept      62
Install_oct       31
Install_nov       30
Name: count, dtype: int64

📆 Unique months found: ['Install_aug' 'Install_dec' 'Install_july' 'Install_nov' 'Install_oct'
 'Install_sept']
In [8]:
import pandas as pd
import glob

path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install"
files = glob.glob(path + "/*.csv")

merged_data = []

for file in files:
    df = pd.read_csv(file, encoding='latin1', low_memory=False)
    
    # Optional: Extract month from filename
    month_name = os.path.basename(file).split('.')[0]  # Example: "September"
    df['Month'] = month_name.capitalize()
    
    merged_data.append(df)

# Combine all
merged_df = pd.concat(merged_data, ignore_index=True)
In [7]:
for file in files:
    df = pd.read_csv(file, encoding='latin1', low_memory=False)
    print(os.path.basename(file), "→", len(df), "rows")
install_Aug.csv → 5685 rows
install_dec.csv → 486 rows
install_july.csv → 2482 rows
install_nov.csv → 30 rows
install_oct.csv → 31 rows
install_sept.csv → 62 rows
In [87]:
print("Total merged rows:", len(merged_df))
Total merged rows: 17552
In [10]:
import os
import pandas as pd
import glob

# 🔹 Set your folder path
path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install" # change this to your actual folder

# 🔹 Find all CSV files in the folder
files = glob.glob(os.path.join(path, "*.csv"))

# 🔹 Print all detected files with row counts
print("===== FILES INCLUDED IN MERGE =====")
for file in files:
    try:
        temp = pd.read_csv(file, encoding='latin1', low_memory=False)
        print(f"{os.path.basename(file):<30} | Rows: {len(temp)}")
    except Exception as e:
        print(f"{os.path.basename(file):<30} | ⚠️ Error reading file: {e}")

# 🔹 Check the unique months in the final merged dataframe (if you already merged)
print("\n===== MONTHS PRESENT IN MERGED DATA =====")
print(merged_df['Month'].value_counts())
print("\n📆 Unique months found:", merged_df['Month'].unique())
===== FILES INCLUDED IN MERGE =====
install_Aug.csv                | Rows: 5685
install_dec.csv                | Rows: 486
install_july.csv               | Rows: 2482
install_nov.csv                | Rows: 30
install_oct.csv                | Rows: 31
install_sept.csv               | Rows: 62

===== MONTHS PRESENT IN MERGED DATA =====
Month
Install_aug     5685
Install_july    2482
Install_dec      486
Install_sept      62
Install_oct       31
Install_nov       30
Name: count, dtype: int64

📆 Unique months found: ['Install_aug' 'Install_dec' 'Install_july' 'Install_nov' 'Install_oct'
 'Install_sept']
In [89]:
# 🔹 Set your output path (update to your folder)
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data.csv"

# 🔹 Save the merged dataframe
merged_df.to_csv(output_path, index=False, encoding='utf-8')

print(f"✅ Merged CSV saved successfully at:\n{output_path}")
✅ Merged CSV saved successfully at:
C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data.csv

Monthly Trend Analysis of Install& Uninstall

In [5]:
import pandas as pd
import matplotlib.pyplot as plt

# ==============================
# Step 1: Load & Clean Data
# ==============================
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data_Cleaned.csv"
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)

# Convert 'Date' to datetime
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Keep only relevant columns
df = df[['Date', 'Daily Device Installs', 'Daily Device Uninstalls']]

# Remove NaN and duplicates
df.dropna(subset=['Date', 'Daily Device Installs', 'Daily Device Uninstalls'], inplace=True)
df.drop_duplicates(inplace=True)

# ==============================
# Step 2: Daily Trend Analysis
# ==============================
daily_summary = df.groupby('Date')[['Daily Device Installs', 'Daily Device Uninstalls']].sum().reset_index()
print("\n===== Daily Summary =====")
print(daily_summary.describe())

# Plot daily trends
plt.figure(figsize=(12,6))
plt.plot(daily_summary['Date'], daily_summary['Daily Device Installs'], label='Daily Installs', marker='o')
plt.plot(daily_summary['Date'], daily_summary['Daily Device Uninstalls'], label='Daily Uninstalls', marker='x')
plt.title('Daily Install & Uninstall Trends')
plt.xlabel('Date')
plt.ylabel('Number of Devices')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
===== Daily Summary =====
                      Date  Daily Device Installs  Daily Device Uninstalls
count                   30              30.000000                     30.0
mean   2024-07-16 12:00:00            1450.833333                      0.0
min    2024-07-02 00:00:00              14.000000                      0.0
25%    2024-07-09 06:00:00              25.500000                      0.0
50%    2024-07-16 12:00:00            1173.000000                      0.0
75%    2024-07-23 18:00:00            2644.750000                      0.0
max    2024-07-31 00:00:00            4308.000000                      0.0
std                    NaN            1522.512920                      0.0
No description has been provided for this image
In [100]:
# Step 3: Weekly Trend Analysis
# ==============================
df['Year_Week'] = df['Date'].dt.to_period('W').apply(lambda r: r.start_time)
weekly_summary = df.groupby('Year_Week')[['Daily Device Installs', 'Daily Device Uninstalls']].sum().reset_index()

# Plot weekly trends
plt.figure(figsize=(12,6))
plt.plot(weekly_summary['Year_Week'], weekly_summary['Daily Device Installs'], label='Weekly Installs', marker='o')
plt.plot(weekly_summary['Year_Week'], weekly_summary['Daily Device Uninstalls'], label='Weekly Uninstalls', marker='x')
plt.title('Weekly Install & Uninstall Trends')
plt.xlabel('Week Start Date')
plt.ylabel('Number of Devices')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
No description has been provided for this image
In [101]:
# ==============================
# Step 4: Monthly Trend Analysis (Calendar Month)
# ==============================
df['Year_Month'] = df['Date'].dt.to_period('M').astype(str)
monthly_summary = df.groupby('Year_Month')[['Daily Device Installs', 'Daily Device Uninstalls']].sum().reset_index()

# Plot monthly trends
plt.figure(figsize=(10,5))
plt.bar(monthly_summary['Year_Month'], monthly_summary['Daily Device Installs'], label='Monthly Installs', alpha=0.7)
plt.bar(monthly_summary['Year_Month'], monthly_summary['Daily Device Uninstalls'], label='Monthly Uninstalls', alpha=0.7)
plt.title('Monthly Install & Uninstall Trends')
plt.xlabel('Month')
plt.ylabel('Number of Devices')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.legend()
plt.tight_layout()
plt.show()
No description has been provided for this image
In [102]:
# ==============================
# Step 5: Statistical Summary
# ==============================
print("\n===== Statistical Summary =====")
print("Daily Installs & Uninstalls:")
print(daily_summary[['Daily Device Installs', 'Daily Device Uninstalls']].describe())
print("\nWeekly Installs & Uninstalls:")
print(weekly_summary[['Daily Device Installs', 'Daily Device Uninstalls']].describe())
print("\nMonthly Installs & Uninstalls:")
print(monthly_summary[['Daily Device Installs', 'Daily Device Uninstalls']].describe())
===== Statistical Summary =====
Daily Installs & Uninstalls:
       Daily Device Installs  Daily Device Uninstalls
count             122.000000                    122.0
mean             2047.639344                      0.0
std              2176.634012                      0.0
min                 6.000000                      0.0
25%                40.000000                      0.0
50%              1451.000000                      0.0
75%              2899.000000                      0.0
max              8301.000000                      0.0

Weekly Installs & Uninstalls:
       Daily Device Installs  Daily Device Uninstalls
count              19.000000                     19.0
mean            13148.000000                      0.0
std             13572.012771                      0.0
min                55.000000                      0.0
25%              1011.000000                      0.0
50%              7789.000000                      0.0
75%             18626.000000                      0.0
max             41503.000000                      0.0

Monthly Installs & Uninstalls:
       Daily Device Installs  Daily Device Uninstalls
count               4.000000                      4.0
mean            62453.000000                      0.0
std             41559.269291                      0.0
min             13949.000000                      0.0
25%             37709.000000                      0.0
50%             63197.000000                      0.0
75%             87941.000000                      0.0
max            109469.000000                      0.0

Country/region-based usage distribution

Prepare a mapping of country codes to full names

In [ ]:
pip install pycountry
In [3]:
import pandas as pd
import pycountry

# ==============================
# STEP 1: Load your merged CSV
# ==============================
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data.csv"
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)

# ==============================
# STEP 2: Remove rows with empty Country
# ==============================
df = df[df['Country'].notna() & (df['Country'].str.strip() != '')]

# ==============================
# STEP 3: Map country codes to full names
# ==============================
def code_to_country(code):
    try:
        return pycountry.countries.get(alpha_2=code.upper()).name
    except:
        return code  # fallback if code not found

df['Country_Full'] = df['Country'].apply(code_to_country)

# ==============================
# STEP 4: Save updated CSV
# ==============================
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data_Cleaned.csv"
df.to_csv(output_path, index=False, encoding='utf-8')
print(f"✅ Updated CSV saved at:\n{output_path}")

# Optional preview
print(df[['Country', 'Country_Full']].head())
✅ Updated CSV saved at:
C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data_Cleaned.csv
     Country Country_Full
6171      BG     Bulgaria
6172      BR       Brazil
6173      DE      Germany
6174      HU      Hungary
6175      MY     Malaysia
In [106]:
df.head()
Out[106]:
Date Package name Carrier Daily Device Installs Daily Device Uninstalls Daily Device Upgrades Total User Installs Daily User Installs Daily User Uninstalls Active Device Installs ... Uninstall events Month Android OS Version Country Package Name Daily Crashes Daily ANRs App Version Code Source_File Country_Full
6171 7/2/2024 com.app.grow.greener.plants NaN 6.0 0.0 0.0 0.0 4.0 3.0 3.0 ... 4.0 Install_july NaN BG NaN NaN NaN NaN NaN Bulgaria
6172 7/2/2024 com.app.grow.greener.plants NaN 1.0 0.0 0.0 0.0 1.0 0.0 1.0 ... 0.0 Install_july NaN BR NaN NaN NaN NaN NaN Brazil
6173 7/2/2024 com.app.grow.greener.plants NaN 4.0 0.0 0.0 0.0 3.0 9.0 2.0 ... 9.0 Install_july NaN DE NaN NaN NaN NaN NaN Germany
6174 7/2/2024 com.app.grow.greener.plants NaN 2.0 0.0 0.0 0.0 1.0 0.0 1.0 ... 0.0 Install_july NaN HU NaN NaN NaN NaN NaN Hungary
6175 7/2/2024 com.app.grow.greener.plants NaN 1.0 0.0 0.0 0.0 1.0 0.0 1.0 ... 0.0 Install_july NaN MY NaN NaN NaN NaN NaN Malaysia

5 rows × 22 columns

region-based usage distribution based on installation events

In [19]:
import pandas as pd

# Assuming your output DataFrame is named 'df'
# (replace 'df' with your actual DataFrame name)

output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data_Cleaned.csv"  # 👈 change this to your preferred location

# Save the DataFrame to CSV
df.to_csv(output_path, index=False, encoding='utf-8')

print(f"✅ CSV file successfully saved to:\n{output_path}")
✅ CSV file successfully saved to:
C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data_Cleaned.csv
In [17]:
df.head()
Out[17]:
country country_full install_events Year_Month Region
0 BG Bulgaria 7.0 2024-02 Other
1 BR Brazil 1.0 2024-02 South America
2 DE Germany 11.0 2024-02 Europe
3 HU Hungary 2.0 2024-02 Other
4 MY Malaysia 1.0 2024-02 Other
In [13]:
# ===========================================
# STEP 1: Import Libraries
# ===========================================
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# ===========================================
# STEP 2: Load Dataset
# ===========================================
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\July_data.csv"
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)
print("✅ File loaded. Shape:", df.shape)

# ===========================================
# STEP 3: Normalize Column Names
# ===========================================
df.columns = df.columns.str.strip().str.replace(" ", "_").str.lower()

# Check available columns
print("\n📋 Columns available:")
print(df.columns.tolist())

# ===========================================
# STEP 4: Diagnose the Date Column
# ===========================================
# Try to find the column that holds date information
possible_dates = [col for col in df.columns if 'date' in col]
print("\n🔍 Possible date columns:", possible_dates)

# If there's a 'date' column, inspect unique formats
date_col = possible_dates[0] if possible_dates else None
if not date_col:
    raise ValueError("No 'date' column found. Please verify your dataset.")

print("\nSample date values:")
print(df[date_col].head(10))

# ===========================================
# STEP 5: Clean and Convert Date
# ===========================================
# Try to convert; if fails, coerce to NaT
df[date_col] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True)

# If some are still NaT, try parsing as Excel serial or text
if df[date_col].isna().sum() > 0:
    try:
        df[date_col] = pd.to_datetime(df[date_col].astype(float), origin='1899-12-30', unit='D')
    except:
        pass

# Drop rows without valid dates
df = df.dropna(subset=[date_col])
print(f"\n✅ Dates cleaned. Total valid rows: {len(df)}")

# ===========================================
# STEP 6: Extract Month and Year
# ===========================================
df['Year_Month'] = df[date_col].dt.to_period('M').astype(str)

print("\n📆 Unique months found after cleaning:")
print(df['Year_Month'].unique())

# ===========================================
# STEP 7: Clean and Prepare for Analysis
# ===========================================
# Keep only relevant columns
columns_needed = ['country', 'country_full', 'install_events', 'Year_Month']
available_cols = [col for col in columns_needed if col in df.columns]
df = df[available_cols]

# Convert installs to numeric
df['install_events'] = pd.to_numeric(df['install_events'], errors='coerce')
df = df.dropna(subset=['install_events'])
df = df[df['install_events'] > 0]

# ===========================================
# STEP 8: Region Mapping
# ===========================================
region_mapping = {
    "Nigeria": "Africa", "Kenya": "Africa", "Ghana": "Africa", "South Africa": "Africa", "Egypt": "Africa",
    "Germany": "Europe", "France": "Europe", "United Kingdom": "Europe", "Spain": "Europe", "Italy": "Europe",
    "India": "Asia", "China": "Asia", "Japan": "Asia", "Indonesia": "Asia", "Philippines": "Asia", "Singapore": "Asia",
    "United States": "North America", "Canada": "North America", "Mexico": "North America",
    "Brazil": "South America", "Argentina": "South America", "Chile": "South America",
    "Australia": "Oceania", "New Zealand": "Oceania"
}
df['Region'] = df['country_full'].map(region_mapping).fillna('Other')

# ===========================================
# STEP 9: Group by Month & Region
# ===========================================
monthly_region = df.groupby(['Year_Month', 'Region'])['install_events'].sum().reset_index()

# ===========================================
# STEP 10: Validate Month Coverage
# ===========================================
print("\n🗓️ Months appearing in grouped data:")
print(monthly_region['Year_Month'].unique())

# ===========================================
# STEP 11: Visualization
# ===========================================
plt.figure(figsize=(12, 6))
sns.lineplot(data=monthly_region, x='Year_Month', y='install_events', hue='Region', marker='o')
plt.title("July Install Events by Region")
plt.xlabel("Month")
plt.ylabel("Total Install Events")
plt.xticks(rotation=45)
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()

# ===========================================
# STEP 12: Save Cleaned File
# ===========================================
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Cleaned_Region_Trend_Analysis.csv"
df.to_csv(output_path, index=False)
print(f"\n✅ Cleaned data with proper dates saved to:\n{output_path}")
✅ File loaded. Shape: (2475, 22)

📋 Columns available:
['date', 'package_name', 'carrier', 'daily_device_installs', 'daily_device_uninstalls', 'daily_device_upgrades', 'total_user_installs', 'daily_user_installs', 'daily_user_uninstalls', 'active_device_installs', 'install_events', 'update_events', 'uninstall_events', 'month', 'android_os_version', 'country', 'package_name', 'daily_crashes', 'daily_anrs', 'app_version_code', 'source_file', 'country_full']

🔍 Possible date columns: ['date', 'update_events']

Sample date values:
0    7/2/2024
1    7/2/2024
2    7/2/2024
3    7/2/2024
4    7/2/2024
5    7/2/2024
6    7/2/2024
7    7/3/2024
8    7/3/2024
9    7/3/2024
Name: date, dtype: object

✅ Dates cleaned. Total valid rows: 293

📆 Unique months found after cleaning:
['2024-02' '2024-03' '2024-04' '2024-05' '2024-06' '2024-07' '2024-08'
 '2024-09' '2024-10' '2024-11' '2024-12']

🗓️ Months appearing in grouped data:
['2024-02' '2024-03' '2024-04' '2024-05' '2024-06' '2024-07' '2024-08'
 '2024-09' '2024-10' '2024-11' '2024-12']
No description has been provided for this image
✅ Cleaned data with proper dates saved to:
C:\Users\hp\Desktop\WEEK 3_DSHub\Cleaned_Region_Trend_Analysis.csv

App Performance Analysis

In [22]:
import pandas as pd
import matplotlib.pyplot as plt

# === Load CSV files ===
df_2024 = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2024.csv")
df_2025 = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2025.csv")

# === Ensure Date column is datetime ===
df_2024['Date'] = pd.to_datetime(df_2024['Date'], errors='coerce')
df_2025['Date'] = pd.to_datetime(df_2025['Date'], errors='coerce')

# === Add Year & Month columns ===
df_2024['Year'] = df_2024['Date'].dt.year
df_2024['Month'] = df_2024['Date'].dt.month_name()

df_2025['Year'] = df_2025['Date'].dt.year
df_2025['Month'] = df_2025['Date'].dt.month_name()

# Confirm structure
print(df_2024.head())
print(df_2025.head())
        Date                 Package Name  Android OS Version  Daily Crashes  \
0 2024-07-05  com.app.grow.greener.plants                 NaN            0.0   
1 2024-07-08  com.app.grow.greener.plants                 NaN            0.0   
2 2024-07-10  com.app.grow.greener.plants                 NaN            1.0   
3 2024-07-11  com.app.grow.greener.plants                 NaN            1.0   
4 2024-07-15  com.app.grow.greener.plants                 NaN            0.0   

   Daily ANRs Month                                      Device  Year  
0         1.0  July        TECNO TECNO-KI7 (TECNO SPARK 10 Pro)  2024  
1         1.0  July                    samsung a13 (Galaxy A13)  2024  
2         0.0  July                    HONOR HNVNE-M (HONOR X6)  2024  
3         0.0  July  Lenovo X306X (Lenovo Tab M10 HD (2nd Gen))  2024  
4         1.0  July           TECNO TECNO-KG6P (TECNO SPARK 8T)  2024  
        Date                 Package Name Android OS Version  Daily Crashes  \
0 2025-02-01  com.app.grow.greener.plants        Android 6.0            0.0   
1 2025-02-01  com.app.grow.greener.plants        Android 7.1           15.0   
2 2025-02-01  com.app.grow.greener.plants        Android 8.0            2.0   
3 2025-02-01  com.app.grow.greener.plants        Android 8.1            1.0   
4 2025-02-01  com.app.grow.greener.plants          Android 9           13.0   

   Daily ANRs     Month Device  Year  
0         8.0  February    NaN  2025  
1         6.0  February    NaN  2025  
2         1.0  February    NaN  2025  
3         6.0  February    NaN  2025  
4         8.0  February    NaN  2025  
In [23]:
import pandas as pd
import re

# Load your CSV file
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2024.csv")

# Define a function to clean and extract device names
def clean_device_name(device):
    if pd.isna(device):
        return ""  # avoid NaN output
    # Try to extract text within parentheses
    match = re.search(r"\((.*?)\)", device)
    if match:
        cleaned = match.group(1).strip()
    else:
        # If no parentheses, use the part before or entire text
        cleaned = device.strip()
    # Remove extra spaces and ensure distinct clean format
    cleaned = re.sub(r"\s+", " ", cleaned)
    return cleaned

# Apply the function to the Device column
df["Cleaned_Device"] = df["Device"].apply(clean_device_name)

# Drop duplicates and reset index (optional, for analysis)
df = df.drop_duplicates(subset=["Cleaned_Device"]).reset_index(drop=True)

# Replace any remaining NaN values with blank strings
df["Cleaned_Device"] = df["Cleaned_Device"].fillna("")

# Save cleaned data to a new file
df.to_csv("Cleaned_Device_Data.csv", index=False)

print("✅ Cleaning complete! File saved as 'Cleaned_Device_Data.csv'")
print(df[["Device", "Cleaned_Device"]].head())
✅ Cleaning complete! File saved as 'Cleaned_Device_Data.csv'
                                       Device              Cleaned_Device
0        TECNO TECNO-KI7 (TECNO SPARK 10 Pro)          TECNO SPARK 10 Pro
1                    samsung a13 (Galaxy A13)                  Galaxy A13
2                    HONOR HNVNE-M (HONOR X6)                    HONOR X6
3  Lenovo X306X (Lenovo Tab M10 HD (2nd Gen))  Lenovo Tab M10 HD (2nd Gen
4           TECNO TECNO-KG6P (TECNO SPARK 8T)              TECNO SPARK 8T
In [26]:
import pandas as pd
import re

# Load your CSV file
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2025.csv")

# Define a function to clean and extract device names
def clean_device_name(device):
    if pd.isna(device):
        return ""  # avoid NaN output
    # Try to extract text within parentheses
    match = re.search(r"\((.*?)\)", device)
    if match:
        cleaned = match.group(1).strip()
    else:
        # If no parentheses, use the part before or entire text
        cleaned = device.strip()
    # Remove extra spaces and ensure distinct clean format
    cleaned = re.sub(r"\s+", " ", cleaned)
    return cleaned

# Apply the function to the Device column
df["Cleaned_Device"] = df["Device"].apply(clean_device_name)

# Drop duplicates and reset index (optional, for analysis)
df = df.drop_duplicates(subset=["Cleaned_Device"]).reset_index(drop=True)

# Replace any remaining NaN values with blank strings
df["Cleaned_Device"] = df["Cleaned_Device"].fillna("")

# Save cleaned data to a new file
df.to_csv("Cleaned_Device_Data.csv", index=False)

print("✅ Cleaning complete! File saved as 'Cleaned_Device_Data.csv'")
print(df[["Device", "Cleaned_Device"]].head())
✅ Cleaning complete! File saved as 'Cleaned_Device_Data.csv'
          Device Cleaned_Device
0            NaN               
1           1902           1902
2        HWDUB-Q        HWDUB-Q
3  Infinix-X6516  Infinix-X6516
4  Infinix-X665C  Infinix-X665C

Retention/Chun

In [27]:
def retention_churn_analysis(df, year):
    df = df.copy()
    df = df.sort_values(by='Date')
    
    # Group all devices by month
    month_device = df.groupby('Month')['Device'].apply(set).reset_index()
    
    summary = []
    prev_devices = set()
    
    for i, row in month_device.iterrows():
        month = row['Month']
        current_devices = row['Device']
        
        new_users = len(current_devices - prev_devices)
        retained_users = len(current_devices & prev_devices)
        churned_users = len(prev_devices - current_devices)
        total_active = len(current_devices)
        
        retention_rate = (retained_users / (retained_users + new_users) * 100) if (retained_users + new_users) > 0 else 0
        churn_rate = (churned_users / (len(prev_devices) + 1e-9) * 100) if len(prev_devices) > 0 else 0
        
        summary.append({
            'Year': year,
            'Month': month,
            'Total Active Devices': total_active,
            'New Devices': new_users,
            'Retained Devices': retained_users,
            'Churned Devices': churned_users,
            'Retention Rate (%)': round(retention_rate, 2),
            'Churn Rate (%)': round(churn_rate, 2)
        })
        
        prev_devices = current_devices
    
    return pd.DataFrame(summary)
In [28]:
retention_2024 = retention_churn_analysis(df_2024, 2024)
retention_2025 = retention_churn_analysis(df_2025, 2025)

# Combine summaries
combined_retention = pd.concat([retention_2024, retention_2025])
combined_retention.reset_index(drop=True, inplace=True)
combined_retention
Out[28]:
Year Month Total Active Devices New Devices Retained Devices Churned Devices Retention Rate (%) Churn Rate (%)
0 2024 July 782 782 0 0 0.0 0.0
1 2025 February 1 1 0 0 0.0 0.0
2 2025 May 156 156 0 1 0.0 100.0
In [31]:
df_2024 = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2024.csv", encoding='utf-8', parse_dates=['Date'], dayfirst=True)
df_2025 = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2025.csv", encoding='utf-8', parse_dates=['Date'], dayfirst=True)
In [32]:
print(df_2024['Date'].head(10))
0    2024-07-05
1    2024-07-08
2    2024-07-10
3    2024-07-11
4    2024-07-15
5    2024-07-15
6    2024-07-15
7    2024-07-15
8    2024-07-15
9    2024-07-15
Name: Date, dtype: object

monthly-level (July) retention/churn analysis based on Device activity

In [47]:
import pandas as pd

df_2024 = pd.read_csv(
    r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2024.csv",
    encoding='utf-8'
)

# Show the first 10 unique date entries and column names
print("📋 Columns:", df_2024.columns.tolist())
print("\n🔍 Sample date values:")
print(df_2024['Date'].head(10))
📋 Columns: ['Date', 'Package Name', 'Android OS Version', 'Daily Crashes', 'Daily ANRs', 'Month', 'Device', 'Year']

🔍 Sample date values:
0    2024-07-05
1    2024-07-08
2    2024-07-10
3    2024-07-11
4    2024-07-15
5    2024-07-15
6    2024-07-15
7    2024-07-15
8    2024-07-15
9    2024-07-15
Name: Date, dtype: object
In [49]:
import pandas as pd
import matplotlib.pyplot as plt

# Load dataset
df_2024 = pd.read_csv(
    r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2024.csv",
    encoding='utf-8'
)

# Convert Date column to datetime
df_2024['Date'] = pd.to_datetime(df_2024['Date'], errors='coerce')

#  Filter only July 2024 data
df_july_2024 = df_2024[(df_2024['Date'].dt.month == 7) & (df_2024['Date'].dt.year == 2024)]

# Drop missing or invalid device entries
df_july_2024 = df_july_2024.dropna(subset=['Device'])

# Calculate active devices per day
daily_devices = df_july_2024.groupby('Date')['Device'].nunique().reset_index(name='Active_Devices')

# Calculate Retention Rate (% change from previous day)
daily_devices['Retention_Rate_%'] = daily_devices['Active_Devices'].pct_change().fillna(0) * 100

# Calculate simple churn (negative of retention if drop)
daily_devices['Churn_Rate_%'] = daily_devices['Retention_Rate_%'].apply(lambda x: abs(x) if x < 0 else 0)

# Display summary
print("\n Daily Retention & Churn (July 2024):")
print(daily_devices.describe())

# Plot
plt.figure(figsize=(10,6))
plt.plot(daily_devices['Date'], daily_devices['Active_Devices'], marker='o', label='Active Devices', linewidth=2)
plt.title(" User Retention & Device Activity - July 2024")
plt.xlabel("Date")
plt.ylabel("Number of Active Devices")
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
 Daily Retention & Churn (July 2024):
                                Date  Active_Devices  Retention_Rate_%  \
count                             21       21.000000         21.000000   
mean   2024-07-20 05:42:51.428571392      133.476190         56.532700   
min              2024-07-05 00:00:00        1.000000        -11.173184   
25%              2024-07-16 00:00:00       31.000000         -7.291667   
50%              2024-07-21 00:00:00      178.000000          0.000000   
75%              2024-07-26 00:00:00      195.000000         19.018405   
max              2024-07-31 00:00:00      238.000000        500.000000   
std                              NaN       87.277499        141.029997   

       Churn_Rate_%  
count     21.000000  
mean       2.763988  
min        0.000000  
25%        0.000000  
50%        0.000000  
75%        7.291667  
max       11.173184  
std        4.323564  
No description has been provided for this image
In [56]:
import pandas as pd
import matplotlib.pyplot as plt

# Load 2025 dataset
df_2025 = pd.read_csv(
    r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2025.csv",
    encoding='utf-8'
)

# Convert 'Date' to datetime (format: Month-Day-Year)
df_2025['Date'] = pd.to_datetime(df_2025['Date'], errors='coerce')

# Drop invalid dates and devices
df_2025 = df_2025.dropna(subset=['Date', 'Device'])

# Add Month-Year column
df_2025['YearMonth'] = df_2025['Date'].dt.to_period('M')

print(" Unique Year-Months in 2025 data:", df_2025['YearMonth'].unique())

# Group by Date to count active devices per day
daily_devices_2025 = df_2025.groupby('Date')['Device'].nunique().reset_index(name='Active_Devices')

# Compute Retention and Churn rates
daily_devices_2025['Retention_Rate_%'] = daily_devices_2025['Active_Devices'].pct_change().fillna(0) * 100
daily_devices_2025['Churn_Rate_%'] = daily_devices_2025['Retention_Rate_%'].apply(lambda x: abs(x) if x < 0 else 0)

# --- Statistics Summary ---
print("\n Daily Retention & Churn Statistics (2025):")
print(daily_devices_2025.describe())

# --- Visualization ---
plt.figure(figsize=(10,6))
plt.plot(daily_devices_2025['Date'], daily_devices_2025['Active_Devices'], marker='o', label='Active Devices', linewidth=2, color='blue')
plt.title(" User Retention & Device Activity - 2025 ( May)")
plt.xlabel("Date")
plt.ylabel("Number of Active Devices")
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
 Unique Year-Months in 2025 data: <PeriodArray>
['2025-05']
Length: 1, dtype: period[M]

 Daily Retention & Churn Statistics (2025):
                      Date  Active_Devices  Retention_Rate_%  Churn_Rate_%
count                   11       11.000000         11.000000     11.000000
mean   2025-05-06 00:00:00       25.454545         15.118602      6.090092
min    2025-05-01 00:00:00       10.000000        -41.176471      0.000000
25%    2025-05-03 12:00:00       17.500000         -7.644110      0.000000
50%    2025-05-06 00:00:00       19.000000          1.923077      0.000000
75%    2025-05-08 12:00:00       27.500000         34.803922      7.644110
max    2025-05-11 00:00:00       53.000000         80.000000     41.176471
std                    NaN       14.514569         37.347213     12.376602
No description has been provided for this image

APP PERFORMANCE ANALYSIS

In [58]:
import pandas as pd
import matplotlib.pyplot as plt

# Load dataset
df_2024 = pd.read_csv(
    r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2024.csv",
    encoding='utf-8'
)

# Convert Date column to datetime
df_2024['Date'] = pd.to_datetime(df_2024['Date'], errors='coerce')

# Filter July 2024
df_july_2024 = df_2024[(df_2024['Date'].dt.month == 7) & (df_2024['Date'].dt.year == 2024)]

# Group by Date and summarize performance
stability_stats = df_july_2024.groupby('Date').agg({
    'Daily ANRs': 'sum',
    'Daily Crashes': 'sum',
    'Device': 'nunique'
}).reset_index()

stability_stats['Crash_Rate_%'] = (stability_stats['Daily Crashes'] / stability_stats['Device']) * 100
stability_stats['ANR_Rate_%'] = (stability_stats['Daily ANRs'] / stability_stats['Device']) * 100

# Display summary statistics
print("\n📊 App Performance Metrics (July 2024):")
print(stability_stats.describe())

# Plot trends
plt.figure(figsize=(10,6))
plt.plot(stability_stats['Date'], stability_stats['Crash_Rate_%'], marker='o', label='Crash Rate (%)', color='red')
plt.plot(stability_stats['Date'], stability_stats['ANR_Rate_%'], marker='o', label='ANR Rate (%)', color='orange')
plt.title(" App Performance(Daily) - July 2024")
plt.xlabel("Date")
plt.ylabel("Rate (%)")
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
📊 App Performance Metrics (July 2024):
                                Date  Daily ANRs  Daily Crashes      Device  \
count                             21   21.000000      21.000000   21.000000   
mean   2024-07-20 05:42:51.428571392  113.047619     162.904762  133.476190   
min              2024-07-05 00:00:00    0.000000       0.000000    1.000000   
25%              2024-07-16 00:00:00   27.000000      32.000000   31.000000   
50%              2024-07-21 00:00:00  141.000000     199.000000  178.000000   
75%              2024-07-26 00:00:00  177.000000     254.000000  195.000000   
max              2024-07-31 00:00:00  224.000000     386.000000  238.000000   
std                              NaN   76.494756     116.267753   87.277499   

       Crash_Rate_%  ANR_Rate_%  
count     21.000000   21.000000  
mean     101.192355   78.437533  
min        0.000000    0.000000  
25%      100.000000   79.354839  
50%      110.695187   85.937500  
75%      126.380368   88.995215  
max      162.184874  100.000000  
std       45.703656   27.407571  
No description has been provided for this image
In [57]:
import pandas as pd
import matplotlib.pyplot as plt

# Load dataset again for clarity
df_2025 = pd.read_csv(
    r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2025.csv",
    encoding='utf-8'
)

# Convert Date column
df_2025['Date'] = pd.to_datetime(df_2025['Date'], errors='coerce')

# Filter only valid data
df_2025 = df_2025.dropna(subset=['Date', 'Device'])

# --- Group by Date and compute metrics ---
stability_stats_2025 = df_2025.groupby('Date').agg({
    'Daily ANRs': 'sum',
    'Daily Crashes': 'sum',
    'Device': 'nunique'
}).reset_index()

# Compute performance rates
stability_stats_2025['Crash_Rate_%'] = (stability_stats_2025['Daily Crashes'] / stability_stats_2025['Device']) * 100
stability_stats_2025['ANR_Rate_%'] = (stability_stats_2025['Daily ANRs'] / stability_stats_2025['Device']) * 100

# --- Statistics Summary ---
print("\n App Performance Statistics (2025):")
print(stability_stats_2025.describe())

# --- Visualization ---
plt.figure(figsize=(10,6))
plt.plot(stability_stats_2025['Date'], stability_stats_2025['Crash_Rate_%'], marker='o', label='Crash Rate (%)', color='red')
plt.plot(stability_stats_2025['Date'], stability_stats_2025['ANR_Rate_%'], marker='o', label='ANR Rate (%)', color='orange')
plt.title(" App Performance(Daily) - 2025 ( May)")
plt.xlabel("Date")
plt.ylabel("Rate (%)")
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
 App Performance Statistics (2025):
                      Date  Daily ANRs  Daily Crashes     Device  \
count                   11   11.000000      11.000000  11.000000   
mean   2025-05-06 00:00:00   20.454545      27.818182  25.454545   
min    2025-05-01 00:00:00   11.000000       3.000000  10.000000   
25%    2025-05-03 12:00:00   15.500000      10.000000  17.500000   
50%    2025-05-06 00:00:00   18.000000      12.000000  19.000000   
75%    2025-05-08 12:00:00   25.500000      36.000000  27.500000   
max    2025-05-11 00:00:00   34.000000      86.000000  53.000000   
std                    NaN    8.430464      31.250018  14.514569   

       Crash_Rate_%  ANR_Rate_%  
count     11.000000   11.000000  
mean      84.849635   95.987084  
min       30.000000   28.846154  
25%       52.661064   65.408805  
50%       61.111111   85.714286  
75%      116.832175  110.882353  
max      167.647059  188.235294  
std       51.398566   48.984473  
No description has been provided for this image

DASHBOARDS

In [ ]:
#Retention & Churn Dashboard( July 2024)
In [20]:
# ======================================
#  USER RETENTION VS CHURN ANALYSIS
# Dataset: Final_review_analysis_cleaned.csv
# ======================================

import pandas as pd
import numpy as np
import plotly.express as px

# -------------------------------
#  LOAD AND CLEAN DATA
# -------------------------------
# Load CSV
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\Final_Review_Analysis_Cleaned.csv")

# Keep only relevant columns
df = df[['Device', 'Review Submit Date and Time', 'Review Last Update Date and Time', 'Star Rating']]

# Rename columns for simplicity
df.columns = ['Device', 'Submit_Date', 'Update_Date', 'Star_Rating']

# Clean & convert date columns to datetime
df['Submit_Date'] = pd.to_datetime(df['Submit_Date'], errors='coerce')
df['Update_Date'] = pd.to_datetime(df['Update_Date'], errors='coerce')

# Drop rows with invalid or missing dates
df = df.dropna(subset=['Submit_Date', 'Update_Date'])

# Extract only the date part (no time)
df['Submit_Date'] = df['Submit_Date'].dt.date
df['Update_Date'] = df['Update_Date'].dt.date

print(" Data cleaned successfully!")
print("Columns:", df.columns.tolist())
print("\nSample:")
print(df.head())

# -------------------------------
#  DEFINE ACTIVE vs INACTIVE USERS
# -------------------------------
# Rule:
# If a device has a review update on a given day → Active
# If a device has no update activity for 7+ days → Inactive

# Create daily activity summary
daily_activity = df.groupby('Update_Date')['Device'].nunique().reset_index()
daily_activity.columns = ['Date', 'Active_Users']

# Sort by date
daily_activity = daily_activity.sort_values('Date')

# Compute churn (drop in active users)
daily_activity['Previous_Active'] = daily_activity['Active_Users'].shift(1)
daily_activity['Churn'] = daily_activity['Previous_Active'] - daily_activity['Active_Users']

# Any negative churn (growth) should be 0
daily_activity['Churn'] = daily_activity['Churn'].apply(lambda x: x if x > 0 else 0)

# Calculate retention rate (percentage of users retained)
daily_activity['Retention_Rate'] = (
    daily_activity['Active_Users'] / daily_activity['Previous_Active'] * 100
).round(2)

# Fill first day NA values
daily_activity.fillna({'Previous_Active': 0, 'Churn': 0, 'Retention_Rate': 100}, inplace=True)

# -------------------------------
#  ADD ACTIVE/INACTIVE USER STATUS
# -------------------------------
# Approximation: inactive users = total devices - active on that day
total_devices = df['Device'].nunique()
daily_activity['Inactive_Users'] = total_devices - daily_activity['Active_Users']

print("\n Daily Retention & Churn Data:")
print(daily_activity.head())

# -------------------------------
#  VISUALIZE WITH DASHBOARD-LIKE PLOTS
# -------------------------------
# Plot 1: Active vs Inactive Users
fig_activity = px.line(
    daily_activity,
    x='Date',
    y=['Active_Users', 'Inactive_Users'],
    title=' Daily Active vs Inactive Users',
    markers=True
)
fig_activity.update_layout(xaxis_title='Date', yaxis_title='User Count')
fig_activity.show()

# Plot 2: Retention and Churn Trends
fig_retention = px.bar(
    daily_activity,
    x='Date',
    y='Retention_Rate',
    title=' Daily Retention Rate (%)',
    color='Retention_Rate',
    color_continuous_scale='Viridis'
)
fig_retention.update_layout(xaxis_title='Date', yaxis_title='Retention (%)')
fig_retention.show()

fig_churn = px.line(
    daily_activity,
    x='Date',
    y='Churn',
    title=' Daily User Churn Count',
    markers=True,
    line_shape='spline'
)
fig_churn.update_layout(xaxis_title='Date', yaxis_title='Churn Count')
fig_churn.show()

# -------------------------------
#  STATISTICAL SUMMARY
# -------------------------------
print("\n Statistical Summary:")
print(daily_activity[['Active_Users', 'Inactive_Users', 'Churn', 'Retention_Rate']].describe())
print(f"\nTotal Unique Devices: {total_devices}")
 Data cleaned successfully!
Columns: ['Device', 'Submit_Date', 'Update_Date', 'Star_Rating']

Sample:
      Device Submit_Date Update_Date  Star_Rating
0     RE588E  2024-07-02  2024-07-02          1.0
1  RMX1993L1  2024-07-02  2024-07-02          1.0
2       a04e  2024-07-05  2024-07-05          5.0
3    mdh15lm  2024-07-08  2024-07-08          5.0
4        a10  2024-07-15  2024-07-15          5.0

 Daily Retention & Churn Data:
         Date  Active_Users  Previous_Active  Churn  Retention_Rate  \
0  2024-07-02             2              0.0    0.0          100.00   
1  2024-07-05             1              2.0    1.0           50.00   
2  2024-07-08             1              1.0    0.0          100.00   
3  2024-07-15            14              1.0    0.0         1400.00   
4  2024-07-16            50             14.0    0.0          357.14   

   Inactive_Users  
0             473  
1             474  
2             474  
3             461  
4             425  
 Statistical Summary:
       Active_Users  Inactive_Users       Churn  Retention_Rate
count     10.000000       10.000000   10.000000        10.00000
mean      77.500000      397.500000   12.600000       262.08800
std       77.386835       77.386835   31.690167       410.21605
min        1.000000      279.000000    0.000000        41.52000
25%        5.000000      323.250000    0.000000        90.43000
50%       60.500000      414.500000    0.000000       107.99000
75%      151.750000      470.000000    0.750000       192.25000
max      196.000000      474.000000  100.000000      1400.00000

Total Unique Devices: 475

FRUT GARDEN INSIGHT OVERVIEW DASHBOARD

In [21]:
pip install dash plotly pandas
Requirement already satisfied: dash in c:\users\hp\appdata\local\anaconda3\lib\site-packages (3.2.0)
Requirement already satisfied: plotly in c:\users\hp\appdata\local\anaconda3\lib\site-packages (5.24.1)
Requirement already satisfied: pandas in c:\users\hp\appdata\local\anaconda3\lib\site-packages (2.3.3)
Requirement already satisfied: Flask<3.2,>=1.0.4 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (3.1.0)
Requirement already satisfied: Werkzeug<3.2 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (3.1.3)
Requirement already satisfied: importlib-metadata in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (8.5.0)
Requirement already satisfied: typing-extensions>=4.1.1 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (4.12.2)
Requirement already satisfied: requests in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (2.32.3)
Requirement already satisfied: retrying in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (1.4.2)
Requirement already satisfied: nest-asyncio in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (1.6.0)
Requirement already satisfied: setuptools in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (72.1.0)
Requirement already satisfied: Jinja2>=3.1.2 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (3.1.6)
Requirement already satisfied: itsdangerous>=2.2 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (2.2.0)
Requirement already satisfied: click>=8.1.3 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (8.1.8)
Requirement already satisfied: blinker>=1.9 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (1.9.0)
Requirement already satisfied: MarkupSafe>=2.1.1 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from Werkzeug<3.2->dash) (3.0.2)
Requirement already satisfied: tenacity>=6.2.0 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from plotly) (9.0.0)
Requirement already satisfied: packaging in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from plotly) (24.2)
Requirement already satisfied: numpy>=1.26.0 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from pandas) (2.1.3)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from pandas) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from pandas) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from pandas) (2025.2)
Requirement already satisfied: colorama in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from click>=8.1.3->Flask<3.2,>=1.0.4->dash) (0.4.6)
Requirement already satisfied: six>=1.5 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)
Requirement already satisfied: zipp>=3.20 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from importlib-metadata->dash) (3.21.0)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from requests->dash) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from requests->dash) (3.7)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from requests->dash) (2.3.0)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from requests->dash) (2025.10.5)
Note: you may need to restart the kernel to use updated packages.
In [1]:
import pandas as pd
import plotly.express as px
from dash import Dash, html, dcc

# =============================
# Load and prepare 2024 dataset
# =============================
df_2024 = pd.read_csv(
    r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2024.csv",
    encoding='utf-8'
)
df_2024['Date'] = pd.to_datetime(df_2024['Date'], errors='coerce')
df_2024 = df_2024.dropna(subset=['Date', 'Device'])

# --- July 2024 Retention ---
df_july_2024 = df_2024[(df_2024['Date'].dt.month == 7) & (df_2024['Date'].dt.year == 2024)]
daily_devices_2024 = df_july_2024.groupby('Date')['Device'].nunique().reset_index(name='Active_Devices')
daily_devices_2024['Retention_Rate_%'] = daily_devices_2024['Active_Devices'].pct_change().fillna(0) * 100
daily_devices_2024['Churn_Rate_%'] = daily_devices_2024['Retention_Rate_%'].apply(lambda x: abs(x) if x < 0 else 0)

# --- July 2024 Stability ---
stability_stats = df_july_2024.groupby('Date').agg({
    'Daily ANRs': 'sum',
    'Daily Crashes': 'sum',
    'Device': 'nunique'
}).reset_index()
stability_stats['Crash_Rate_%'] = (stability_stats['Daily Crashes'] / stability_stats['Device']) * 100
stability_stats['ANR_Rate_%'] = (stability_stats['Daily ANRs'] / stability_stats['Device']) * 100

# =============================
# Load and prepare 2025 dataset
# =============================
df_2025 = pd.read_csv(
    r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2025.csv",
    encoding='utf-8'
)
df_2025['Date'] = pd.to_datetime(df_2025['Date'], errors='coerce')
df_2025 = df_2025.dropna(subset=['Date', 'Device'])
daily_devices_2025 = df_2025.groupby('Date')['Device'].nunique().reset_index(name='Active_Devices')
daily_devices_2025['Retention_Rate_%'] = daily_devices_2025['Active_Devices'].pct_change().fillna(0) * 100
daily_devices_2025['Churn_Rate_%'] = daily_devices_2025['Retention_Rate_%'].apply(lambda x: abs(x) if x < 0 else 0)

# =============================
# Create Visualizations
# =============================
fig1 = px.line(
    daily_devices_2024,
    x='Date', y='Active_Devices',
    title=" Active Devices - July 2024",
    markers=True,
    line_shape='linear',
    color_discrete_sequence=['#1f77b4']
)
fig1.update_traces(line=dict(width=4))

fig2 = px.line(
    stability_stats,
    x='Date', y=['Crash_Rate_%', 'ANR_Rate_%'],
    title="⚙️ App Stability Metrics - July 2024",
    markers=True,
    color_discrete_sequence=['#d62728', '#ff7f0e']
)
fig2.update_traces(line=dict(width=4))

fig3 = px.line(
    daily_devices_2025,
    x='Date', y='Active_Devices',
    title="📈 Active Devices - 2025 (All Months)",
    markers=True,
    color_discrete_sequence=['#2ca02c']
)
fig3.update_traces(line=dict(width=4))

# =============================
# Build Dashboard Layout
# =============================
app = Dash(__name__)
app.title = "App Performance Dashboard"

app.layout = html.Div([
    html.H1(" App Performance Dashboard", style={'textAlign': 'center', 'color': '#003366'}),

    html.Div([
        html.H3("User Retention & Activity (2024 July)", style={'color': '#1f77b4'}),
        dcc.Graph(figure=fig1)
    ], style={'margin': '20px'}),

    html.Div([
        html.H3("App Stability (Crash & ANR Rates)", style={'color': '#d62728'}),
        dcc.Graph(figure=fig2)
    ], style={'margin': '20px'}),

    html.Div([
        html.H3("User Retention & Activity (2025)", style={'color': '#2ca02c'}),
        dcc.Graph(figure=fig3)
    ], style={'margin': '20px'})
])

# =============================
# Run the Dashboard
# =============================
if __name__ == "__main__":
    app.run(debug=True)
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300

print("="*70)
print("BATCH 1: DATA LOADING & TIME SERIES ANALYSIS")
print("="*70)

# Load the data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")

# Check available columns
print("\nAvailable columns in dataset:")
print(df.columns.tolist())

# Strip whitespace from column names
df.columns = df.columns.str.strip()

# Identify the columns we need
required_base = ['Daily ANRs', 'Android OS Version', 'Daily Crashes']
date_col = None
device_col = None

# Find date column
for col in df.columns:
    if 'date' in col.lower():
        date_col = col
        break

# Find device column
for col in df.columns:
    if 'device' in col.lower():
        device_col = col
        break

print(f"\nIdentified columns:")
print(f"  Date column: {date_col}")
print(f"  Device column: {device_col}")
print(f"  Required columns: {required_base}")

# Select only needed columns
columns_to_keep = required_base.copy()
if date_col:
    columns_to_keep.insert(0, date_col)
if device_col:
    columns_to_keep.append(device_col)

# Keep only required columns and drop the rest
df = df[columns_to_keep].copy()

print(f"\nColumns retained for analysis: {df.columns.tolist()}")
print(f"Original dataset size: {len(df):,} records")

# Data preprocessing
if date_col:
    df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
    df = df.dropna(subset=[date_col])
    df = df.rename(columns={date_col: 'Date'})

df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Daily Crashes', 'Daily ANRs'])

if device_col:
    df = df.rename(columns={device_col: 'Device'})

print(f"Cleaned dataset size: {len(df):,} records")
print(f"\nData date range: {df['Date'].min()} to {df['Date'].max()}")

# Display sample of cleaned data
print("\nSample of cleaned data (first 5 rows):")
print(df.head())

print("\nBasic statistics:")
print(df[['Daily Crashes', 'Daily ANRs']].describe())

# ===========================================================================
# A. HIGH-LEVEL STABILITY METRICS
# ===========================================================================

print("\n" + "="*70)
print("A. HIGH-LEVEL STABILITY METRICS")
print("="*70)

# 1. TIME SERIES ANALYSIS: Daily Crashes and ANRs aggregated by date
print("\n1. TIME SERIES ANALYSIS - Daily Crashes and ANRs")
print("-" * 70)

daily_agg = df.groupby('Date').agg({
    'Daily Crashes': 'sum',
    'Daily ANRs': 'sum'
}).reset_index().sort_values('Date')

print(f"\nTime Period: {daily_agg['Date'].min().strftime('%Y-%m-%d')} to {daily_agg['Date'].max().strftime('%Y-%m-%d')}")
print(f"Total Days: {len(daily_agg)}")
print(f"\nTotal Crashes (entire period): {daily_agg['Daily Crashes'].sum():,.0f}")
print(f"Total ANRs (entire period): {daily_agg['Daily ANRs'].sum():,.0f}")
print(f"Average Daily Crashes: {daily_agg['Daily Crashes'].mean():.2f}")
print(f"Average Daily ANRs: {daily_agg['Daily ANRs'].mean():.2f}")
print(f"Max Daily Crashes: {daily_agg['Daily Crashes'].max():.0f} on {daily_agg.loc[daily_agg['Daily Crashes'].idxmax(), 'Date'].strftime('%Y-%m-%d')}")
print(f"Max Daily ANRs: {daily_agg['Daily ANRs'].max():.0f} on {daily_agg.loc[daily_agg['Daily ANRs'].idxmax(), 'Date'].strftime('%Y-%m-%d')}")

# Create time series visualization
fig, axes = plt.subplots(2, 1, figsize=(15, 9))

# Crashes over time
axes[0].plot(daily_agg['Date'], daily_agg['Daily Crashes'], 
             marker='o', linewidth=2.5, markersize=5, color='#e74c3c', 
             markerfacecolor='white', markeredgewidth=2)
axes[0].fill_between(daily_agg['Date'], daily_agg['Daily Crashes'], 
                      alpha=0.3, color='#e74c3c')
axes[0].set_title('Daily Crashes Over Time (2024-2025)', 
                  fontsize=15, fontweight='bold', pad=15)
axes[0].set_ylabel('Total Daily Crashes', fontsize=12, fontweight='bold')
axes[0].grid(True, alpha=0.4, linestyle='--')
axes[0].tick_params(axis='x', rotation=45, labelsize=10)
axes[0].tick_params(axis='y', labelsize=10)

# Add mean line
mean_crashes = daily_agg['Daily Crashes'].mean()
axes[0].axhline(y=mean_crashes, color='red', linestyle='--', 
                linewidth=2, alpha=0.7, label=f'Mean: {mean_crashes:.1f}')
axes[0].legend(loc='upper right', fontsize=10)

# ANRs over time
axes[1].plot(daily_agg['Date'], daily_agg['Daily ANRs'], 
             marker='s', linewidth=2.5, markersize=5, color='#f39c12',
             markerfacecolor='white', markeredgewidth=2)
axes[1].fill_between(daily_agg['Date'], daily_agg['Daily ANRs'], 
                      alpha=0.3, color='#f39c12')
axes[1].set_title('Daily ANRs Over Time (2024-2025)', 
                  fontsize=15, fontweight='bold', pad=15)
axes[1].set_xlabel('Date', fontsize=12, fontweight='bold')
axes[1].set_ylabel('Total Daily ANRs', fontsize=12, fontweight='bold')
axes[1].grid(True, alpha=0.4, linestyle='--')
axes[1].tick_params(axis='x', rotation=45, labelsize=10)
axes[1].tick_params(axis='y', labelsize=10)

# Add mean line
mean_anrs = daily_agg['Daily ANRs'].mean()
axes[1].axhline(y=mean_anrs, color='orange', linestyle='--', 
                linewidth=2, alpha=0.7, label=f'Mean: {mean_anrs:.1f}')
axes[1].legend(loc='upper right', fontsize=10)

plt.tight_layout()
plt.savefig('batch1_time_series_crashes_anrs.png', dpi=300, bbox_inches='tight')
print("\n✓ Time series chart saved: batch1_time_series_crashes_anrs.png")
plt.close()

# Monthly breakdown
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['YearMonth'] = df['Date'].dt.to_period('M')

monthly_summary = df.groupby('YearMonth').agg({
    'Daily Crashes': 'sum',
    'Daily ANRs': 'sum'
}).reset_index()

monthly_summary['YearMonth'] = monthly_summary['YearMonth'].astype(str)

print("\n\nMonthly Summary:")
print(monthly_summary.to_string(index=False))

print("\n" + "="*70)
print("BATCH 1 COMPLETE - Time Series Analysis Done")
print("="*70)
======================================================================
BATCH 1: DATA LOADING & TIME SERIES ANALYSIS
======================================================================

Available columns in dataset:
['Date', 'Package Name', 'Android OS Version', 'Daily Crashes', 'Daily ANRs', 'Month', 'Device']

Identified columns:
  Date column: Date
  Device column: Device
  Required columns: ['Daily ANRs', 'Android OS Version', 'Daily Crashes']

Columns retained for analysis: ['Date', 'Daily ANRs', 'Android OS Version', 'Daily Crashes', 'Device']
Original dataset size: 3,392 records
Cleaned dataset size: 3,391 records

Data date range: 2024-07-05 00:00:00 to 2025-05-11 00:00:00

Sample of cleaned data (first 5 rows):
        Date  Daily ANRs Android OS Version  Daily Crashes  \
0 2025-02-01         8.0        Android 6.0            0.0   
1 2025-02-01         6.0        Android 7.1           15.0   
2 2025-02-01         1.0        Android 8.0            2.0   
3 2025-02-01         6.0        Android 8.1            1.0   
4 2025-02-01         8.0          Android 9           13.0   

                                       Device  
0        TECNO TECNO-KI7 (TECNO SPARK 10 Pro)  
1                    samsung a13 (Galaxy A13)  
2                    HONOR HNVNE-M (HONOR X6)  
3  Lenovo X306X (Lenovo Tab M10 HD (2nd Gen))  
4           TECNO TECNO-KG6P (TECNO SPARK 8T)  

Basic statistics:
       Daily Crashes   Daily ANRs
count     3391.00000  3391.000000
mean         2.04335     2.695960
std          5.03289    13.818733
min          0.00000     0.000000
25%          0.00000     0.000000
50%          1.00000     1.000000
75%          2.00000     1.000000
max         77.00000   185.000000

======================================================================
A. HIGH-LEVEL STABILITY METRICS
======================================================================

1. TIME SERIES ANALYSIS - Daily Crashes and ANRs
----------------------------------------------------------------------

Time Period: 2024-07-05 to 2025-05-11
Total Days: 60

Total Crashes (entire period): 6,929
Total ANRs (entire period): 9,142
Average Daily Crashes: 115.48
Average Daily ANRs: 152.37
Max Daily Crashes: 386 on 2024-07-25
Max Daily ANRs: 368 on 2025-02-22

✓ Time series chart saved: batch1_time_series_crashes_anrs.png


Monthly Summary:
YearMonth  Daily Crashes  Daily ANRs
  2024-07         3421.0      2374.0
  2025-02         3202.0      6543.0
  2025-05          306.0       225.0

======================================================================
BATCH 1 COMPLETE - Time Series Analysis Done
======================================================================
In [ ]:
 
In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300

# Load and prepare data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
df.columns = df.columns.str.strip()

# Find date column
date_col = [col for col in df.columns if 'date' in col.lower()][0]

# Keep required columns
df = df[['Daily ANRs', 'Android OS Version', 'Daily Crashes', date_col]].copy()

# Clean data
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.rename(columns={date_col: 'Date'})
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])

# Aggregate by date
daily_agg = df.groupby('Date').agg({
    'Daily Crashes': 'sum',
    'Daily ANRs': 'sum'
}).reset_index().sort_values('Date')

# PLOT: Time Series Chart
fig, axes = plt.subplots(2, 1, figsize=(15, 9))

# Crashes over time
axes[0].plot(daily_agg['Date'], daily_agg['Daily Crashes'], 
             marker='o', linewidth=2.5, markersize=5, color='#e74c3c', 
             markerfacecolor='white', markeredgewidth=2)
axes[0].fill_between(daily_agg['Date'], daily_agg['Daily Crashes'], 
                      alpha=0.3, color='#e74c3c')
axes[0].set_title('Daily Crashes Over Time (2024-2025)', 
                  fontsize=15, fontweight='bold', pad=15)
axes[0].set_ylabel('Total Daily Crashes', fontsize=12, fontweight='bold')
axes[0].grid(True, alpha=0.4, linestyle='--')
axes[0].tick_params(axis='x', rotation=45, labelsize=10)
axes[0].tick_params(axis='y', labelsize=10)

# Add mean line
mean_crashes = daily_agg['Daily Crashes'].mean()
axes[0].axhline(y=mean_crashes, color='red', linestyle='--', 
                linewidth=2, alpha=0.7, label=f'Mean: {mean_crashes:.1f}')
axes[0].legend(loc='upper right', fontsize=10)

# ANRs over time
axes[1].plot(daily_agg['Date'], daily_agg['Daily ANRs'], 
             marker='s', linewidth=2.5, markersize=5, color='#f39c12',
             markerfacecolor='white', markeredgewidth=2)
axes[1].fill_between(daily_agg['Date'], daily_agg['Daily ANRs'], 
                      alpha=0.3, color='#f39c12')
axes[1].set_title('Daily ANRs Over Time (2024-2025)', 
                  fontsize=15, fontweight='bold', pad=15)
axes[1].set_xlabel('Date', fontsize=12, fontweight='bold')
axes[1].set_ylabel('Total Daily ANRs', fontsize=12, fontweight='bold')
axes[1].grid(True, alpha=0.4, linestyle='--')
axes[1].tick_params(axis='x', rotation=45, labelsize=10)
axes[1].tick_params(axis='y', labelsize=10)

# Add mean line
mean_anrs = daily_agg['Daily ANRs'].mean()
axes[1].axhline(y=mean_anrs, color='orange', linestyle='--', 
                linewidth=2, alpha=0.7, label=f'Mean: {mean_anrs:.1f}')
axes[1].legend(loc='upper right', fontsize=10)

plt.tight_layout()
plt.savefig('batch1_time_series_crashes_anrs.png', dpi=300, bbox_inches='tight')
plt.show()
print("✓ Chart saved: batch1_time_series_crashes_anrs.png")
No description has been provided for this image
✓ Chart saved: batch1_time_series_crashes_anrs.png
In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300

print("="*70)
print("BATCH 2: CRASH RATE ANALYSIS BY OS VERSION & DEVICE")
print("="*70)

# Load the data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
df.columns = df.columns.str.strip()

# Find columns
date_col = [col for col in df.columns if 'date' in col.lower()][0]
device_col = [col for col in df.columns if 'device' in col.lower()][0] if any('device' in col.lower() for col in df.columns) else None

# Keep only required columns
columns_to_keep = ['Daily ANRs', 'Android OS Version', 'Daily Crashes', date_col]
if device_col:
    columns_to_keep.append(device_col)

df = df[columns_to_keep].copy()

# Clean data
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.rename(columns={date_col: 'Date'})
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])

if device_col:
    df = df.rename(columns={device_col: 'Device'})

print(f"Dataset loaded: {len(df):,} records")

# ===========================================================================
# 2. CRASH RATE BY ANDROID OS VERSION
# ===========================================================================

print("\n2. CRASH RATE BY ANDROID OS VERSION")
print("-" * 70)

os_metrics = df.groupby('Android OS Version').agg({
    'Daily Crashes': ['sum', 'mean', 'count'],
    'Daily ANRs': ['sum', 'mean']
}).reset_index()

os_metrics.columns = ['OS_Version', 'Total_Crashes', 'Avg_Crashes', 'Record_Count', 
                      'Total_ANRs', 'Avg_ANRs']

# Calculate crash rate (crashes per record)
os_metrics['Crash_Rate'] = (os_metrics['Total_Crashes'] / os_metrics['Record_Count']) * 100
os_metrics['ANR_Rate'] = (os_metrics['Total_ANRs'] / os_metrics['Record_Count']) * 100

# Sort by crash rate
os_metrics = os_metrics.sort_values('Crash_Rate', ascending=False)

print("\nAndroid OS Version Performance Metrics:")
print(os_metrics.to_string(index=False))

# Visualization - OS Version Crash Rate
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Crash Rate
bars1 = axes[0].barh(range(len(os_metrics)), os_metrics['Crash_Rate'], 
                     color='#3498db', alpha=0.8, edgecolor='black')
# Highlight top 3 worst
for i in range(min(3, len(bars1))):
    bars1[i].set_color('#e74c3c')
    bars1[i].set_alpha(1.0)

axes[0].set_yticks(range(len(os_metrics)))
axes[0].set_yticklabels(os_metrics['OS_Version'], fontsize=10)
axes[0].set_xlabel('Crash Rate (% of records)', fontsize=12, fontweight='bold')
axes[0].set_title('Crash Rate by Android OS Version', fontsize=13, fontweight='bold')
axes[0].grid(True, alpha=0.3, axis='x')
axes[0].invert_yaxis()

# Add value labels
for i, v in enumerate(os_metrics['Crash_Rate']):
    axes[0].text(v + 0.5, i, f'{v:.1f}%', va='center', fontsize=9, fontweight='bold')

# ANR Rate
bars2 = axes[1].barh(range(len(os_metrics)), os_metrics['ANR_Rate'], 
                     color='#9b59b6', alpha=0.8, edgecolor='black')
# Highlight top 3 worst
for i in range(min(3, len(bars2))):
    bars2[i].set_color('#f39c12')
    bars2[i].set_alpha(1.0)

axes[1].set_yticks(range(len(os_metrics)))
axes[1].set_yticklabels(os_metrics['OS_Version'], fontsize=10)
axes[1].set_xlabel('ANR Rate (% of records)', fontsize=12, fontweight='bold')
axes[1].set_title('ANR Rate by Android OS Version', fontsize=13, fontweight='bold')
axes[1].grid(True, alpha=0.3, axis='x')
axes[1].invert_yaxis()

# Add value labels
for i, v in enumerate(os_metrics['ANR_Rate']):
    axes[1].text(v + 0.5, i, f'{v:.1f}%', va='center', fontsize=9, fontweight='bold')

plt.tight_layout()
plt.savefig('batch2_crash_rate_by_os.png', dpi=300, bbox_inches='tight')
print("\n✓ Chart saved: batch2_crash_rate_by_os.png")
plt.close()

# ===========================================================================
# 3. CRASH RATE BY DEVICE (if Device column exists)
# ===========================================================================

if 'Device' in df.columns:
    print("\n\n3. CRASH RATE BY DEVICE")
    print("-" * 70)
    
    device_metrics = df.groupby('Device').agg({
        'Daily Crashes': ['sum', 'mean', 'count'],
        'Daily ANRs': ['sum', 'mean']
    }).reset_index()
    
    device_metrics.columns = ['Device', 'Total_Crashes', 'Avg_Crashes', 'Record_Count', 
                              'Total_ANRs', 'Avg_ANRs']
    
    device_metrics['Crash_Rate'] = (device_metrics['Total_Crashes'] / device_metrics['Record_Count']) * 100
    device_metrics['ANR_Rate'] = (device_metrics['Total_ANRs'] / device_metrics['Record_Count']) * 100
    
    device_metrics = device_metrics.sort_values('Crash_Rate', ascending=False)
    
    print("\nDevice Performance Metrics:")
    print(device_metrics.to_string(index=False))
    
    # Visualization - Device Crash Rate (Top 15)
    top_devices = device_metrics.head(15)
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))
    
    # Crash Rate
    bars1 = axes[0].barh(range(len(top_devices)), top_devices['Crash_Rate'], 
                         color='#3498db', alpha=0.8, edgecolor='black')
    for i in range(min(5, len(bars1))):
        bars1[i].set_color('#e74c3c')
        bars1[i].set_alpha(1.0)
    
    axes[0].set_yticks(range(len(top_devices)))
    axes[0].set_yticklabels(top_devices['Device'], fontsize=9)
    axes[0].set_xlabel('Crash Rate (% of records)', fontsize=12, fontweight='bold')
    axes[0].set_title('Top 15 Devices by Crash Rate', fontsize=13, fontweight='bold')
    axes[0].grid(True, alpha=0.3, axis='x')
    axes[0].invert_yaxis()
    
    # ANR Rate
    bars2 = axes[1].barh(range(len(top_devices)), top_devices['ANR_Rate'], 
                         color='#9b59b6', alpha=0.8, edgecolor='black')
    for i in range(min(5, len(bars2))):
        bars2[i].set_color('#f39c12')
        bars2[i].set_alpha(1.0)
    
    axes[1].set_yticks(range(len(top_devices)))
    axes[1].set_yticklabels(top_devices['Device'], fontsize=9)
    axes[1].set_xlabel('ANR Rate (% of records)', fontsize=12, fontweight='bold')
    axes[1].set_title('Top 15 Devices by ANR Rate', fontsize=13, fontweight='bold')
    axes[1].grid(True, alpha=0.3, axis='x')
    axes[1].invert_yaxis()
    
    plt.tight_layout()
    plt.savefig('batch2_crash_rate_by_device.png', dpi=300, bbox_inches='tight')
    print("\n✓ Chart saved: batch2_crash_rate_by_device.png")
    plt.close()

else:
    print("\n\n3. DEVICE ANALYSIS SKIPPED")
    print("-" * 70)
    print("No Device column found in dataset.")

print("\n" + "="*70)
print("BATCH 2 COMPLETE - Crash Rate Analysis Done")
print("="*70)
======================================================================
BATCH 2: CRASH RATE ANALYSIS BY OS VERSION & DEVICE
======================================================================
Dataset loaded: 3,391 records

2. CRASH RATE BY ANDROID OS VERSION
----------------------------------------------------------------------

Android OS Version Performance Metrics:
 OS_Version  Total_Crashes  Avg_Crashes  Record_Count  Total_ANRs   Avg_ANRs  Crash_Rate     ANR_Rate
 Android 14         1076.0    38.428571            28      4176.0 149.142857 3842.857143 14914.285714
  Android 9          679.0    24.250000            28       182.0   6.500000 2425.000000   650.000000
 Android 13          381.0    13.607143            28       325.0  11.607143 1360.714286  1160.714286
 Android 12          290.0    10.357143            28       562.0  20.071429 1035.714286  2007.142857
 Android 11          238.0     8.500000            28       375.0  13.392857  850.000000  1339.285714
 Android 10          195.0     6.964286            28       330.0  11.785714  696.428571  1178.571429
Android 8.1          135.0     4.821429            28       243.0   8.678571  482.142857   867.857143
Android 7.1           93.0     4.043478            23        64.0   2.782609  404.347826   278.260870
Android 8.0           49.0     2.227273            22        21.0   0.954545  222.727273    95.454545
 Android 15           44.0     1.571429            28       180.0   6.428571  157.142857   642.857143
Android 6.0           16.0     0.761905            21        48.0   2.285714   76.190476   228.571429
Android 7.0            6.0     0.352941            17        35.0   2.058824   35.294118   205.882353
Android 12L            0.0     0.000000             2         2.0   1.000000    0.000000   100.000000

✓ Chart saved: batch2_crash_rate_by_os.png


3. CRASH RATE BY DEVICE
----------------------------------------------------------------------

Device Performance Metrics:
                                                                           Device  Total_Crashes  Avg_Crashes  Record_Count  Total_ANRs   Avg_ANRs  Crash_Rate     ANR_Rate
                                                    samsung r5q (Galaxy S10 Lite)           46.0    46.000000             1       165.0 165.000000 4600.000000 16500.000000
                                                          KDDI SOV35 (Xperia XZs)           36.0    36.000000             1       144.0 144.000000 3600.000000 14400.000000
                                                      Redmi miel (Redmi Note 11S)           63.0    31.500000             2       185.0  92.500000 3150.000000  9250.000000
                                                        Blackview BV9200 (BV9200)           26.0    26.000000             1         8.0   8.000000 2600.000000   800.000000
                                                        Xiaomi plato (Xiaomi 12T)           22.0    22.000000             1       138.0 138.000000 2200.000000 13800.000000
                                            ALLVIEW X8_Soul_Style (X8 Soul Style)           36.0    18.000000             2       138.0  69.000000 1800.000000  6900.000000
                                                       Infinix Infinix-X652A (S5)           17.0    17.000000             1         4.0   4.000000 1700.000000   400.000000
                                                         KDDI SOG02 (Xperia 5 II)           17.0    17.000000             1        18.0  18.000000 1700.000000  1800.000000
                                              samsung gt58wifi (Galaxy Tab A 8.0)           17.0    17.000000             1         6.0   6.000000 1700.000000   600.000000
                                                        HONOR HNCLK-Q (HONOR X7b)           16.0    16.000000             1         1.0   1.000000 1600.000000   100.000000
                                                       motorola cancun (moto g14)           79.0    15.800000             5       154.0  30.800000 1580.000000  3080.000000
                                                 motorola albus (Moto Z (2) Play)           44.0    14.666667             3        16.0   5.333333 1466.666667   533.333333
                                                          POCO vayu (POCO X3 Pro)           66.0    13.200000             5       161.0  32.200000 1320.000000  3220.000000
                                                TECNO TECNO-KG6P (TECNO SPARK 8T)           13.0    13.000000             1         8.0   8.000000 1300.000000   800.000000
                                                         POCO surya (POCO X3 NFC)           37.0    12.333333             3         7.0   2.333333 1233.333333   233.333333
                                                       realme RE5887 (realme C30)           35.0    11.666667             3         0.0   0.000000 1166.666667     0.000000
                                                                vivo 2111 (V2111)           46.0    11.500000             4        49.0  12.250000 1150.000000  1225.000000
                                                       samsung a30s (Galaxy A30s)           46.0    11.500000             4       148.0  37.000000 1150.000000  3700.000000
                                                         samsung a10 (Galaxy A10)           57.0    11.400000             5       157.0  31.400000 1140.000000  3140.000000
                                                                vivo 2027 (V2029)           55.0    11.000000             5       166.0  33.200000 1100.000000  3320.000000
                                                         OPPO OP5353L1 (A57s/A77)           33.0    11.000000             3         2.0   0.666667 1100.000000    66.666667
                                                    Infinix Infinix-X690 (NOTE 7)           11.0    11.000000             1        11.0  11.000000 1100.000000  1100.000000
                                                       realme RE5894 (realme C33)           43.0    10.750000             4       159.0  39.750000 1075.000000  3975.000000
                                                              BLU B130DL (B131DL)           86.0    10.750000             8       323.0  40.375000 1075.000000  4037.500000
                                                         Xiaomi oxygen (MI MAX 2)           10.0    10.000000             1        10.0  10.000000 1000.000000  1000.000000
                                                        Redmi eos (Redmi 10 2022)           19.0     9.500000             2        40.0  20.000000  950.000000  2000.000000
                                                         HONOR HWAUM-Q (Honor 7C)           38.0     9.500000             4        11.0   2.750000  950.000000   275.000000
                                                         realme RMX2020 (RMX2020)           28.0     9.333333             3         6.0   2.000000  933.333333   200.000000
                                                  HONOR HNCLK-M1 (HONOR 90 Smart)            9.0     9.000000             1        13.0  13.000000  900.000000  1300.000000
                                           Itel itel-A551L (itel A27(P17 Pro 4G))            9.0     9.000000             1        23.0  23.000000  900.000000  2300.000000
                                                           PRITOM L8_B02 (L8_B02)            9.0     9.000000             1         7.0   7.000000  900.000000   700.000000
                                                TECNO TECNO-KG5j (TECNO SPARK 8C)           27.0     9.000000             3         3.0   1.000000  900.000000   100.000000
                                                    POCO duchamp (POCO X6 Pro 5G)           52.0     8.666667             6        18.0   3.000000  866.666667   300.000000
                                                                OPPO CPH1823 (F9)           43.0     8.600000             5       138.0  27.600000  860.000000  2760.000000
                                                   samsung b0s (Galaxy S22 Ultra)           17.0     8.500000             2        16.0   8.000000  850.000000   800.000000
                                                     realme RMX3261 (realme C21Y)           25.0     8.333333             3         5.0   1.666667  833.333333   166.666667
                                                         samsung a03 (Galaxy A03)           48.0     8.000000             6        12.0   2.000000  800.000000   200.000000
                                            samsung on7xreflte (Galaxy On7 Prime)           31.0     7.750000             4        14.0   3.500000  775.000000   350.000000
                                                            vivo V2327 (Y200e 5G)           31.0     7.750000             4        41.0  10.250000  775.000000  1025.000000
                                                  samsung j4primelte (Galaxy J4+)           85.0     7.727273            11       163.0  14.818182  772.727273  1481.818182
                                                            Redmi fire (Redmi 12)           98.0     7.538462            13       202.0  15.538462  753.846154  1553.846154
                                              TECNO TECNO-KD7 (TECNO SPARK 5 Pro)           15.0     7.500000             2         4.0   2.000000  750.000000   200.000000
                                                            vivo 1716 (vivo 1716)           45.0     7.500000             6       153.0  25.500000  750.000000  2550.000000
                                                Redmi ruby (Redmi Note 12 Pro 5G)           37.0     7.400000             5         7.0   1.400000  740.000000   140.000000
                                                                 vivo 1727ID (V9)            7.0     7.000000             1         0.0   0.000000  700.000000     0.000000
                                                          OPPO OP4BA5L1 (CPH2113)           14.0     7.000000             2         1.0   0.500000  700.000000    50.000000
                                                   motorola pettyl (moto e5 play)            7.0     7.000000             1        16.0  16.000000  700.000000  1600.000000
                                           HUAWEI HWSTK-HF (HUAWEI Y9 Prime 2019)            7.0     7.000000             1         2.0   2.000000  700.000000   200.000000
                                                        google gm4g_s_sprout (4G)            7.0     7.000000             1         1.0   1.000000  700.000000   100.000000
                                                          Xiaomi pine (Redmi  7A)           14.0     7.000000             2        23.0  11.500000  700.000000  1150.000000
                                                    TECNO TECNO-BF6 (TECNO POP 7)           21.0     7.000000             3        12.0   4.000000  700.000000   400.000000
                                                Redmi emerald (Redmi Note 13 Pro)           13.0     6.500000             2        39.0  19.500000  650.000000  1950.000000
                                                          VGO_TEL NEW_20 (NEW_20)           38.0     6.333333             6       142.0  23.666667  633.333333  2366.666667
                                                     HONOR HWLLD-H (Honor 9 Lite)           62.0     6.200000            10       165.0  16.500000  620.000000  1650.000000
                                                     samsung a15x (Galaxy A15 5G)           43.0     6.142857             7       153.0  21.857143  614.285714  2185.714286
                                                            Itel itel-A661L (A49)            6.0     6.000000             1         0.0   0.000000  600.000000     0.000000
                                                          ATT WTATTRW2 (WTATTRW2)           12.0     6.000000             2        14.0   7.000000  600.000000   700.000000
                                                      Redmi merlin (Redmi Note 9)           36.0     6.000000             6        18.0   3.000000  600.000000   300.000000
                                                    TECNO TECNO-KD6 (SPARK 5 Air)            6.0     6.000000             1         5.0   5.000000  600.000000   500.000000
                                                      motorola hawaiip (moto g22)           29.0     5.800000             5        11.0   2.200000  580.000000   220.000000
                                                      HONOR HNRKY-M1 (HONOR X7a )           29.0     5.800000             5       126.0  25.200000  580.000000  2520.000000
                                                      Redmi topaz (Redmi Note 12)           61.0     5.545455            11        33.0   3.000000  554.545455   300.000000
                                                 Infinix Infinix-X6511B (SMART 6)           11.0     5.500000             2         0.0   0.000000  550.000000     0.000000
                                                                         OP595BL1           11.0     5.500000             2         9.0   4.500000  550.000000   450.000000
                                                                    Infinix-X6831           22.0     5.500000             4       149.0  37.250000  550.000000  3725.000000
                                             motorola maltalsc (moto e(7i) power)           11.0     5.500000             2        13.0   6.500000  550.000000   650.000000
                                                            vivo 1902 (vivo 1902)           27.0     5.400000             5         9.0   1.800000  540.000000   180.000000
                                asus ASUS_A001D_1 (ZenFone Max Plus M2 (ZB634KL))           16.0     5.333333             3        20.0   6.666667  533.333333   666.666667
                                                           OPPO CPH1723 (CPH1723)           16.0     5.333333             3        23.0   7.666667  533.333333   766.666667
                                        samsung j2y18lte (Galaxy Grand Prime Pro)           26.0     5.200000             5        30.0   6.000000  520.000000   600.000000
                                                     samsung j8y18lte (Galaxy J8)           10.0     5.000000             2         5.0   2.500000  500.000000   250.000000
                                                                         sunstone           25.0     5.000000             5        27.0   5.400000  500.000000   540.000000
                                                                       TECNO-BD4j            5.0     5.000000             1         1.0   1.000000  500.000000   100.000000
                                                                  UMIDIGI F1 (F1)            5.0     5.000000             1        15.0  15.000000  500.000000  1500.000000
                                                         samsung a30 (Galaxy A30)           10.0     5.000000             2        10.0   5.000000  500.000000   500.000000
                                              motorola parker (motorola one zoom)            5.0     5.000000             1         0.0   0.000000  500.000000     0.000000
                                               motorola athene_f (Moto G(4) Plus)            5.0     5.000000             1         8.0   8.000000  500.000000   800.000000
                                                HUAWEI HWBAC (HUAWEI nova 2 Plus)            5.0     5.000000             1         0.0   0.000000  500.000000     0.000000
                                                       motorola malta (moto e(7))           64.0     4.923077            13       138.0  10.615385  492.307692  1061.538462
                                               motorola ocean_t (moto g(7) power)           29.0     4.833333             6        16.0   2.666667  483.333333   266.666667
                                                   motorola montana (Moto G (5S))           24.0     4.800000             5        11.0   2.200000  480.000000   220.000000
                                                            POCO citrus (POCO M3)           19.0     4.750000             4         4.0   1.000000  475.000000   100.000000
                                                       motorola sabahl (moto e13)           14.0     4.666667             3        17.0   5.666667  466.666667   566.666667
                                                     motorola hanoip (moto g(60))           28.0     4.666667             6       149.0  24.833333  466.666667  2483.333333
                                                        samsung a6lte (Galaxy A6)           14.0     4.666667             3         0.0   0.000000  466.666667     0.000000
                                                            lge meh15lm (LM-K420)           28.0     4.666667             6        26.0   4.333333  466.666667   433.333333
                                                       motorola java (moto g(20))           23.0     4.600000             5        12.0   2.400000  460.000000   240.000000
                                                           HUAWEI HWINE (nova 3i)           23.0     4.600000             5        12.0   2.400000  460.000000   240.000000
                                                  Infinix Infinix-X657C (SMART 5)           23.0     4.600000             5         3.0   0.600000  460.000000    60.000000
                                                         motorola ali (moto g(6))           32.0     4.571429             7        21.0   3.000000  457.142857   300.000000
                                                              OPPO OP4EFDL1 (A53)           32.0     4.571429             7       135.0  19.285714  457.142857  1928.571429
                                                    FACETEL Q3pro_eea (Q3pro_eea)           18.0     4.500000             4         8.0   2.000000  450.000000   200.000000
                                                            vivo 1904 (vivo 1904)           58.0     4.461538            13        20.0   1.538462  446.153846   153.846154
                                                      Redmi tapas (Redmi Note 12)           58.0     4.142857            14        61.0   4.357143  414.285714   435.714286
                                              Blackview Active8Pro (Active 8 Pro)            4.0     4.000000             1         1.0   1.000000  400.000000   100.000000
                                                        realme RE58B8L1 (RMX3771)            4.0     4.000000             1         0.0   0.000000  400.000000     0.000000
                                                     motorola caprip (moto g(30))           12.0     4.000000             3        15.0   5.000000  400.000000   500.000000
                                                        realme RE50C1 (realme 7i)            4.0     4.000000             1         0.0   0.000000  400.000000     0.000000
                                                             TCL Passat (TCL 30+)            4.0     4.000000             1         0.0   0.000000  400.000000     0.000000
                                                                 vivo V2205 (Y35)           12.0     4.000000             3        13.0   4.333333  400.000000   433.333333
                                                   Infinix Infinix-X662B (HOT 11)            4.0     4.000000             1         0.0   0.000000  400.000000     0.000000
                                                            HUAWEI HWMHA (Mate 9)            4.0     4.000000             1         1.0   1.000000  400.000000   100.000000
                                                         Redmi xun (Redmi Pad SE)            4.0     4.000000             1         2.0   2.000000  400.000000   200.000000
                                               Reeder S19_Max_64GB (S19 Max 64GB)            4.0     4.000000             1         0.0   0.000000  400.000000     0.000000
                                                            vivo 1901 (vivo 1901)            8.0     4.000000             2         4.0   2.000000  400.000000   200.000000
                                                              OPPO OP4C77L1 (A52)            4.0     4.000000             1         1.0   1.000000  400.000000   100.000000
                                                 Redmi curtana (Redmi Note 9 Pro)           20.0     4.000000             5       159.0  31.800000  400.000000  3180.000000
                                                        HONOR HNLLY-Q (HONOR X8b)            4.0     4.000000             1         0.0   0.000000  400.000000     0.000000
                                                     TCL U5A_PLUS_4G (Alcatel 1X)            4.0     4.000000             1         0.0   0.000000  400.000000     0.000000
                                                                    SMART_W84_EEA           12.0     4.000000             3        23.0   7.666667  400.000000   766.666667
                                           HUAWEI HWAGS-Q (HUAWEI MediaPad T3 10)            4.0     4.000000             1         0.0   0.000000  400.000000     0.000000
                                                                            V2251            4.0     4.000000             1        20.0  20.000000  400.000000  2000.000000
                                                TECNO TECNO-KJ5n (TECNO SPARK 20)            4.0     4.000000             1         0.0   0.000000  400.000000     0.000000
                                             TECNO TECNO-KC3 (TECNO CAMON 12 Air)            8.0     4.000000             2        11.0   5.500000  400.000000   550.000000
                                               samsung j2corelte (Galaxy J2 Core)           40.0     4.000000            10        33.0   3.300000  400.000000   330.000000
                                               Infinix Infinix-X6512 (SMART 6 HD)            4.0     4.000000             1         0.0   0.000000  400.000000     0.000000
                                       Multilaser Multilaser_E_2 (Multilaser E 2)            4.0     4.000000             1         1.0   1.000000  400.000000   100.000000
                                                                 vivo V2333 (Y03)           51.0     3.923077            13       171.0  13.153846  392.307692  1315.384615
                                         Infinix Infinix-X6731B (Infinix ZERO 30)           18.0     3.600000             5        19.0   3.800000  360.000000   380.000000
                                                     motorola bali (moto e6 play)           43.0     3.583333            12        53.0   4.416667  358.333333   441.666667
                                                          Redmi earth (Redmi 12C)           32.0     3.555556             9        38.0   4.222222  355.555556   422.222222
                                            Infinix Infinix-X6815C (ZERO 5G 2023)            7.0     3.500000             2        10.0   5.000000  350.000000   500.000000
                                                     motorola payton (Moto X (4))            7.0     3.500000             2         2.0   1.000000  350.000000   100.000000
                                                samsung a01core (Galaxy A01 Core)           14.0     3.500000             4         2.0   0.500000  350.000000    50.000000
                                                                        moonstone            7.0     3.500000             2         0.0   0.000000  350.000000     0.000000
                                                 Infinix Infinix-X688B (HOT Play)           35.0     3.500000            10        13.0   1.300000  350.000000   130.000000
                                             Infinix Infinix-X6710 (NOTE 30 VIP )            7.0     3.500000             2         1.0   0.500000  350.000000    50.000000
                                                 xiaomi lavender (Redmi  Note  7)           14.0     3.500000             4         2.0   0.500000  350.000000    50.000000
                                                     samsung a14x (Galaxy A14 5G)           48.0     3.428571            14       181.0  12.928571  342.857143  1292.857143
                                                        motorola borag (moto e22)           30.0     3.333333             9        22.0   2.444444  333.333333   244.444444
                                                             vivo PD1901 (V1901A)           23.0     3.285714             7         4.0   0.571429  328.571429    57.142857
                                                                OPPO CPH1819 (F7)           13.0     3.250000             4         4.0   1.000000  325.000000   100.000000
                                                          OPPO OP4F25L1 (CPH2159)           13.0     3.250000             4         1.0   0.250000  325.000000    25.000000
                                                        samsung a05m (Galaxy A05)           48.0     3.200000            15        26.0   1.733333  320.000000   173.333333
                                                         samsung a14 (Galaxy A14)           48.0     3.200000            15        36.0   2.400000  320.000000   240.000000
                                                     HONOR HNRMO-Q (HONOR X9a 5G)           16.0     3.200000             5         2.0   0.400000  320.000000    40.000000
                                                    motorola penang (moto g53 5G)           16.0     3.200000             5         1.0   0.200000  320.000000    20.000000
                                                      motorola penangf (moto g13)           19.0     3.166667             6        35.0   5.833333  316.666667   583.333333
                                               samsung j4corelte (Galaxy J4 Core)           37.0     3.083333            12        37.0   3.083333  308.333333   308.333333
                                                      realme RMX2001L1 (realme 6)            3.0     3.000000             1         0.0   0.000000  300.000000     0.000000
                                                     samsung dreamlte (Galaxy S8)            9.0     3.000000             3         0.0   0.000000  300.000000     0.000000
                                                  motorola guamp (moto g(9) play)            6.0     3.000000             2         0.0   0.000000  300.000000     0.000000
                                                      realme RMX2151L1 (realme 7)            3.0     3.000000             1         2.0   2.000000  300.000000   200.000000
                                     motorola troika_sprout (motorola one action)            3.0     3.000000             1         1.0   1.000000  300.000000   100.000000
                                                         lge mlv5 (LG K10 (2017))            3.0     3.000000             1         0.0   0.000000  300.000000     0.000000
                                             motorola astro (motorola one fusion)            3.0     3.000000             1         0.0   0.000000  300.000000     0.000000
                                                      samsung m20lte (Galaxy M20)            3.0     3.000000             1         0.0   0.000000  300.000000     0.000000
                                                  Teclast T40Air_ROW (T40Air_ROW)            3.0     3.000000             1         0.0   0.000000  300.000000     0.000000
                                                            Itel itel-L5002 (A25)            3.0     3.000000             1         0.0   0.000000  300.000000     0.000000
                                                   Infinix Infinix-X626B-LTE (S4)            3.0     3.000000             1        12.0  12.000000  300.000000  1200.000000
                                                       Xiaomi nabu (Xiaomi Pad 5)            6.0     3.000000             2         1.0   0.500000  300.000000    50.000000
                                                     TECNO TECNO-BC1s (POP 4 LTE)            3.0     3.000000             1         0.0   0.000000  300.000000     0.000000
                                           OnePlus OnePlus9Pro (OnePlus 9 Pro 5G)            3.0     3.000000             1         1.0   1.000000  300.000000   100.000000
                                     OnePlus OP5958L1 (OnePlus Nord CE 3 Lite 5G)            6.0     3.000000             2        10.0   5.000000  300.000000   500.000000
                                                          OPPO OP4F4DL1 (CPH2213)            3.0     3.000000             1         0.0   0.000000  300.000000     0.000000
                                                            vivo 1819 (vivo 1819)            3.0     3.000000             1         0.0   0.000000  300.000000     0.000000
                                                          OPPO OP4B83L1 (CPH1907)            3.0     3.000000             1         0.0   0.000000  300.000000     0.000000
                                                         Casper VIA_M30 (VIA M30)            3.0     3.000000             1         0.0   0.000000  300.000000     0.000000
                                                   samsung beyond2q (Galaxy S10+)            3.0     3.000000             1         0.0   0.000000  300.000000     0.000000
                                          Infinix Infinix-X669D (Infinix HOT 30i)           12.0     3.000000             4        37.0   9.250000  300.000000   925.000000
                                                         samsung m13 (Galaxy F13)           23.0     2.875000             8        29.0   3.625000  287.500000   362.500000
                                                          TECNO TECNO-BG6 (TECNO)           40.0     2.857143            14        21.0   1.500000  285.714286   150.000000
                                                    Infinix Infinix-X650C (HOT 8)           17.0     2.833333             6        24.0   4.000000  283.333333   400.000000
                                                         motorola fiji (moto e6s)           28.0     2.800000            10        21.0   2.100000  280.000000   210.000000
                                                            Redmi heat (Redmi 12)           14.0     2.800000             5        30.0   6.000000  280.000000   600.000000
                                                   motorola pokerp (moto e? plus)           11.0     2.750000             4        15.0   3.750000  275.000000   375.000000
                                         Infinix Infinix-X6833B (Infinix NOTE 30)           44.0     2.750000            16        53.0   3.312500  275.000000   331.250000
                                               samsung on7xelte (Galaxy J7 Prime)           22.0     2.750000             8        16.0   2.000000  275.000000   200.000000
                                                            lge mdh15lm (LG K41S)           30.0     2.727273            11        21.0   1.909091  272.727273   190.909091
                                                            vivo 1814 (vivo 1814)           19.0     2.714286             7        11.0   1.571429  271.428571   157.142857
                                                   motorola cancunf (moto g54 5G)           35.0     2.692308            13        28.0   2.153846  269.230769   215.384615
                                                   OPPO OP5AF2L1 (Reno12 F/FS 5G)            8.0     2.666667             3         2.0   0.666667  266.666667    66.666667
                                                         samsung a13 (Galaxy A13)           44.0     2.588235            17        33.0   1.941176  258.823529   194.117647
                                          Infinix Infinix-X6525 (Infinix SMART 8)           36.0     2.571429            14        23.0   1.642857  257.142857   164.285714
                                                           Redmi gust (Redmi 13C)           36.0     2.571429            14        40.0   2.857143  257.142857   285.714286
                                                    samsung gta3xl (Galaxy Tab A)            5.0     2.500000             2         3.0   1.500000  250.000000   150.000000
                                                            Nokia MNT (Nokia C10)            5.0     2.500000             2         1.0   0.500000  250.000000    50.000000
                                                                             maui            5.0     2.500000             2         0.0   0.000000  250.000000     0.000000
                                             Infinix Infinix-X6823 (SMART 6 PLUS)            5.0     2.500000             2         1.0   0.500000  250.000000    50.000000
                                                                  lge dh0lm (K22)            5.0     2.500000             2         1.0   0.500000  250.000000    50.000000
                                                       Redmi sea (Redmi Note 12S)           10.0     2.500000             4        16.0   4.000000  250.000000   400.000000
                                                    Redmi iron (Redmi Note 13 5G)            5.0     2.500000             2         2.0   1.000000  250.000000   100.000000
                                                     realme RED8D1 (realme C21-Y)           10.0     2.500000             4         2.0   0.500000  250.000000    50.000000
                                            samsung gteslteatt (Galaxy Tab E 8.0)            5.0     2.500000             2         1.0   0.500000  250.000000    50.000000
                                                              lge mmh4p (LM-X525)           10.0     2.500000             4         2.0   0.500000  250.000000    50.000000
                                                      Infinix Infinix-X663 (NOTE)            5.0     2.500000             2         1.0   0.500000  250.000000    50.000000
                                                                vivo 2026 (V2043)           27.0     2.454545            11        20.0   1.818182  245.454545   181.818182
                                                     samsung m14x (Galaxy M14 5G)           12.0     2.400000             5        12.0   2.400000  240.000000   240.000000
                                     Infinix Infinix-X6812B (Infinix HOT 11S NFC)           12.0     2.400000             5        12.0   2.400000  240.000000   240.000000
                                                              OPPO OP574FL1 (A58)           12.0     2.400000             5         2.0   0.400000  240.000000    40.000000
                                                           OPPO OP56E8L1 (A98 5G)           12.0     2.400000             5         3.0   0.600000  240.000000    60.000000
                                                         motorola lion (moto g04)           26.0     2.363636            11        20.0   1.818182  236.363636   181.818182
                                       Lenovo X306F (Lenovo Tab M10 HD (2nd Gen))            7.0     2.333333             3         2.0   0.666667  233.333333    66.666667
                                                      realme RE58AB (realme C30s)           14.0     2.333333             6         3.0   0.500000  233.333333    50.000000
                                        Infinix Infinix-X680 (Infinix HOT 9 Play)            7.0     2.333333             3         3.0   1.000000  233.333333   100.000000
                                                            vivo 1727 (vivo 1727)            7.0     2.333333             3         1.0   0.333333  233.333333    33.333333
                                                      Redmi sunny (Redmi Note 10)            9.0     2.250000             4        11.0   2.750000  225.000000   275.000000
                                                    samsung a14xm (Galaxy A14 5G)            9.0     2.250000             4         2.0   0.500000  225.000000    50.000000
                                                       samsung a04e (Galaxy A04e)           31.0     2.214286            14        27.0   1.928571  221.428571   192.857143
                                                        samsung a01q (Galaxy A01)           33.0     2.200000            15         9.0   0.600000  220.000000    60.000000
                                                         samsung a02 (Galaxy A02)           22.0     2.200000            10        10.0   1.000000  220.000000   100.000000
                                                      motorola fogorow (moto g24)           11.0     2.200000             5         2.0   0.400000  220.000000    40.000000
                                                 Vertex Pro_P300_4G (Pro_P300_4G)           13.0     2.166667             6        27.0   4.500000  216.666667   450.000000
                                               Redmi camellian (Redmi Note 10 5G)           13.0     2.166667             6         5.0   0.833333  216.666667    83.333333
                                                motorola aljeter (moto g(6) play)           13.0     2.166667             6         6.0   1.000000  216.666667   100.000000
                                                         samsung a23 (Galaxy A23)           23.0     2.090909            11        24.0   2.181818  209.090909   218.181818
                                                  Redmi sweet (Redmi Note 12 Pro)           27.0     2.076923            13        35.0   2.692308  207.692308   269.230769
                                                       samsung a02q (Galaxy A02s)            8.0     2.000000             4         7.0   1.750000  200.000000   175.000000
                                       Itel itel-A551L-Pro (itel A27(P17 Pro 4G))            6.0     2.000000             3         3.0   1.000000  200.000000   100.000000
                                           Infinix Infinix-X6836 (Infinix HOT 40)            6.0     2.000000             3         1.0   0.333333  200.000000    33.333333
                                                motorola channel (moto g(7) play)           14.0     2.000000             7         3.0   0.428571  200.000000    42.857143
                                                     motorola cypfq (moto g51 5G)            2.0     2.000000             1         1.0   1.000000  200.000000   100.000000
                                              motorola harpia_n (Moto G (4) Play)            6.0     2.000000             3         2.0   0.666667  200.000000    66.666667
                                                                         OP530DL1            4.0     2.000000             2         0.0   0.000000  200.000000     0.000000
                                                   motorola lima (moto g(8) play)            6.0     2.000000             3        10.0   3.333333  200.000000   333.333333
                                                      motorola borago (moto e22i)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                  samsung greatlte (Galaxy Note8)            6.0     2.000000             3         1.0   0.333333  200.000000    33.333333
                                             Infinix Infinix-X6526 (SMART 8 Plus)            4.0     2.000000             2         3.0   1.500000  200.000000   150.000000
                                                              Nokia NE1 (Nokia 3)            2.0     2.000000             1         4.0   4.000000  200.000000   400.000000
                                                               OPPO CPH1803 (A3s)           22.0     2.000000            11        24.0   2.181818  200.000000   218.181818
                                                       realme RMX2021 (realme C3)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                 samsung gta8wifi (Galaxy Tab A8)           10.0     2.000000             5         1.0   0.200000  200.000000    20.000000
                                                  realme RE879EL1 (realme 9 Pro+)            2.0     2.000000             1         1.0   1.000000  200.000000   100.000000
                                                    realme RED8CDL1 (narzo 50 5G)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                         HONOR HWBKK-Q (honor 8C)            6.0     2.000000             3         0.0   0.000000  200.000000     0.000000
                                                               Cat S42G (Cat S42)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                          Casper VIA_G3 (VIA_ G3)            4.0     2.000000             2         2.0   1.000000  200.000000   100.000000
                                                        Casper VIA_E30 (VIA_E3_0)            2.0     2.000000             1         1.0   1.000000  200.000000   100.000000
                                                 xiaomi daisy_sprout (Mi A2 Lite)            4.0     2.000000             2         2.0   1.000000  200.000000   100.000000
                                                                          B1550VL            4.0     2.000000             2         8.0   4.000000  200.000000   400.000000
                                                        samsung a52q (Galaxy A52)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                         samsung a24 (Galaxy A24)           12.0     2.000000             6         2.0   0.333333  200.000000    33.333333
                                                     realme RMX3195 (realme C25s)            8.0     2.000000             4         9.0   2.250000  200.000000   225.000000
                                                                 vivo V2217 (Y02)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                         samsung m12 (Galaxy M12)            6.0     2.000000             3        15.0   5.000000  200.000000   500.000000
                                                               lge mdh35lm (K51S)            6.0     2.000000             3         1.0   0.333333  200.000000    33.333333
                                                         iQOO I2126 (iQOO Z6 Pro)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                                 lge L-01K (V30+)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                       docomo SO-01K (Xperia XZ1)            2.0     2.000000             1         3.0   3.000000  200.000000   300.000000
                                   google relm_cheets (Intel Braswell Chromebook)            4.0     2.000000             2        14.0   7.000000  200.000000   700.000000
                                                 TECNO TECNO-KF6n (TECNO SPARK 7)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                         TECNO TECNO-BF7n (TECNO)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                      Xiaomi courbet (Mi 11 Lite)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                        TECNO TECNO-LH7n (POVA 5)           12.0     2.000000             6         9.0   1.500000  200.000000   150.000000
                                                                 WIKO W-V770 (T3)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                             samsung on7xltechn (Galaxy J7 Prime)            2.0     2.000000             1         1.0   1.000000  200.000000   100.000000
                                                           VGOTEL NEW_24 (NEW 24)            2.0     2.000000             1         2.0   2.000000  200.000000   200.000000
                                                     OPPO OP5709L1 (OPPO Reno8 T)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                      OPPO OP5AD3L1 (Reno 11 Pro)            4.0     2.000000             2         0.0   0.000000  200.000000     0.000000
                                                                lge cv109 (LG K9)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                               OPPO OP533FL1 (OPPO Reno7/F21 Pro)            4.0     2.000000             2         2.0   1.000000  200.000000   100.000000
                                                  OPPO OP532FL1 (OPPO Reno7 Z 5G)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                       Xiaomi pipa (Xiaomi Pad 6)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                            Xiaomi rosy (Redmi 5)            4.0     2.000000             2         0.0   0.000000  200.000000     0.000000
                                                        Xiaomi santoni (Redmi 4X)           14.0     2.000000             7         2.0   0.285714  200.000000    28.571429
                                 Multilaser Multilaser_E_Lite (Multilaser E Lite)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                     Itel itel-S681LN (itel S23+)            2.0     2.000000             1         2.0   2.000000  200.000000   200.000000
                                                           Itel itel-W5006X (A17)            4.0     2.000000             2         0.0   0.000000  200.000000     0.000000
                                                             vivo 1723CF (V9 6GB)            6.0     2.000000             3        14.0   4.666667  200.000000   466.666667
                                                            Itel itel-A571W (A37)            2.0     2.000000             1         1.0   1.000000  200.000000   100.000000
                                                            vivo 2015 (vivo 2015)           12.0     2.000000             6        23.0   3.833333  200.000000   383.333333
                                                               vivo 1951 (Z1 Pro)            4.0     2.000000             2         1.0   0.500000  200.000000    50.000000
                                                OnePlus OP5552L1 (OnePlus 10T 5G)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                            vivo 1812 (vivo 1812)            8.0     2.000000             4         1.0   0.250000  200.000000    25.000000
                                                            Nokia COS (Nokia C20)            2.0     2.000000             1         1.0   1.000000  200.000000   100.000000
                                                             POCO stone (POCO M5)           12.0     2.000000             6         3.0   0.500000  200.000000    50.000000
                                                   Redmi lancelot (Redmi 9 Prime)            8.0     2.000000             4         6.0   1.500000  200.000000   150.000000
                                                             Redmi ice (Redmi A1)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                        Realme RMX1821 (realme 3)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                           Infinix Infinix-X6817 (Infinix HOT 12)            4.0     2.000000             2         0.0   0.000000  200.000000     0.000000
                                                                 vivo V2247 (Y36)           14.0     2.000000             7        11.0   1.571429  200.000000   157.142857
                                                            vivo V2317 (V30 Lite)            2.0     2.000000             1         0.0   0.000000  200.000000     0.000000
                                                            OPPO OP4F2F (CPH2185)           23.0     1.916667            12         6.0   0.500000  191.666667    50.000000
                                                           vivo 1906 (vivo 1906 )           26.0     1.857143            14        44.0   3.142857  185.714286   314.285714
                                                 samsung a3core (Galaxy A03 Core)           11.0     1.833333             6        10.0   1.666667  183.333333   166.666667
                                                  Infinix Infinix-X665E (HOT 20i)           11.0     1.833333             6         2.0   0.333333  183.333333    33.333333
                                                         samsung a12 (Galaxy A12)           20.0     1.818182            11         9.0   0.818182  181.818182    81.818182
                                                         HONOR HNVNE-M (HONOR X6)            9.0     1.800000             5         1.0   0.200000  180.000000    20.000000
                                            samsung gta7lite (Galaxy Tab A7 Lite)            9.0     1.800000             5         2.0   0.400000  180.000000    40.000000
                                                       motorola river (moto g(7))            9.0     1.800000             5        11.0   2.200000  180.000000   220.000000
                                                     samsung a33x (Galaxy A33 5G)           18.0     1.800000            10         5.0   0.500000  180.000000    50.000000
                                                        realme REE2ADL1 (RMX3710)           16.0     1.777778             9        25.0   2.777778  177.777778   277.777778
                                                          realme RE5C9F (RMX3834)           16.0     1.777778             9         6.0   0.666667  177.777778    66.666667
                                                     Redmi fleur (Redmi Note 11S)            7.0     1.750000             4         5.0   1.250000  175.000000   125.000000
                                                      HUAWEI HWJAT-M (HUAWEI Y6s)           21.0     1.750000            12        29.0   2.416667  175.000000   241.666667
                                                     Redmi ocean (Redmi Note 12S)            7.0     1.750000             4        30.0   7.500000  175.000000   750.000000
                                                motorola dubai (motorola edge 30)            7.0     1.750000             4         4.0   1.000000  175.000000   100.000000
                                                              OPPO OP575DL1 (A18)            7.0     1.750000             4         0.0   0.000000  175.000000     0.000000
                                               samsung on5xelte (Galaxy J5 Prime)           21.0     1.750000            12        34.0   2.833333  175.000000   283.333333
                                                            vivo 1938 (vivo 1938)            7.0     1.750000             4         6.0   1.500000  175.000000   150.000000
                                                     Nokia WVR_sprout (Nokia 2.4)            7.0     1.750000             4         9.0   2.250000  175.000000   225.000000
                                                            vivo 1811 (vivo 1811)           19.0     1.727273            11         6.0   0.545455  172.727273    54.545455
                                                       samsung a03s (Galaxy A03s)           22.0     1.692308            13        21.0   1.615385  169.230769   161.538462
                                                   motorola doha (moto g(8) plus)           15.0     1.666667             9        44.0   4.888889  166.666667   488.888889
                                                 Redmi joyeuse (Redmi Note 9 Pro)            5.0     1.666667             3         0.0   0.000000  166.666667     0.000000
                                                            OPPO OP4F97 (CPH2269)           15.0     1.666667             9        12.0   1.333333  166.666667   133.333333
                                                  ZTE P963F50 (ZTE Blade A5 2020)            5.0     1.666667             3         2.0   0.666667  166.666667    66.666667
                                                        motorola coful (moto g31)           10.0     1.666667             6         3.0   0.500000  166.666667    50.000000
                                                              OPPO OP4C72L1 (A92)            5.0     1.666667             3         2.0   0.666667  166.666667    66.666667
                                                     samsung j5y17lte (Galaxy J5)           10.0     1.666667             6        10.0   1.666667  166.666667   166.666667
                                                       samsung a10s (Galaxy A10s)           18.0     1.636364            11        14.0   1.272727  163.636364   127.272727
                                                               lge mmh4 (LM-X420)           18.0     1.636364            11        14.0   1.272727  163.636364   127.272727
                                                       Realme RMX1941 (realme C2)           18.0     1.636364            11        12.0   1.090909  163.636364   109.090909
                                                                vivo V2310 (Y17s)           21.0     1.615385            13        25.0   1.923077  161.538462   192.307692
                                                   Infinix Infinix-X670 (NOTE 12)            8.0     1.600000             5         1.0   0.200000  160.000000    20.000000
                                          Infinix Infinix-X6515 (Infinix SMART 7)           22.0     1.571429            14        26.0   1.857143  157.142857   185.714286
                                                   HUAWEI HWDUB-Q (Y7 Prime 2019)           23.0     1.533333            15        20.0   1.333333  153.333333   133.333333
                                                         HONOR HWJSN-H (Honor 8X)            6.0     1.500000             4         9.0   2.250000  150.000000   225.000000
                                              samsung a7y18lte (Galaxy A7 (2018))            9.0     1.500000             6        13.0   2.166667  150.000000   216.666667
                                                    TECNO TECNO-LG6n (POVA Neo 2)            3.0     1.500000             2         1.0   0.500000  150.000000    50.000000
                                                            Redmi blue (Redmi A3)            3.0     1.500000             2         1.0   0.500000  150.000000    50.000000
                                                       samsung a20s (Galaxy A20s)            3.0     1.500000             2         1.0   0.500000  150.000000    50.000000
                                                            OPPO OP5303 (CPH2325)            3.0     1.500000             2         1.0   0.500000  150.000000    50.000000
                                                      Redmi spesn (Redmi Note 11)            6.0     1.500000             4        10.0   2.500000  150.000000   250.000000
                                                          OPPO OP4B80L1 (A9 2020)            3.0     1.500000             2        17.0   8.500000  150.000000   850.000000
                                                       realme RE50BF (realme C17)            3.0     1.500000             2         3.0   1.500000  150.000000   150.000000
                                                           OPPO CPH1801 (CPH1801)            3.0     1.500000             2        11.0   5.500000  150.000000   550.000000
                                                      Itel itel-S666LN (itel RS4)            3.0     1.500000             2         1.0   0.500000  150.000000    50.000000
                                                                  GM G301 (GM 20)            3.0     1.500000             2         0.0   0.000000  150.000000     0.000000
                                                   TECNO TECNO-KE5j (SPARK 6 GO )            3.0     1.500000             2         0.0   0.000000  150.000000     0.000000
                                            HUAWEI HWLDN-Q (HUAWEI Y7 Prime 2018)            9.0     1.500000             6        11.0   1.833333  150.000000   183.333333
                                                    motorola bangkk (moto g84 5G)            3.0     1.500000             2         1.0   0.500000  150.000000    50.000000
                                                        realme RMX1911 (realme 5)            3.0     1.500000             2         0.0   0.000000  150.000000     0.000000
                                                                  DOOGEE RK2 (U9)            3.0     1.500000             2         8.0   4.000000  150.000000   400.000000
                                                                OPPO OP5355 (A57)           15.0     1.500000            10         4.0   0.400000  150.000000    40.000000
                                                          TCL Bangkok_TF (TCL A3)            3.0     1.500000             2         6.0   3.000000  150.000000   300.000000
                                                       OPPO OP5A0BL1 (F25 Pro 5G)            6.0     1.500000             4         6.0   1.500000  150.000000   150.000000
                                                          TECNO TECNO-BF7 (TECNO)           21.0     1.500000            14        19.0   1.357143  150.000000   135.714286
                                                       realme RMX2030 (realme 5i)           10.0     1.428571             7         4.0   0.571429  142.857143    57.142857
                                                      realme RMX2185 (realme C11)           20.0     1.428571            14        25.0   1.785714  142.857143   178.571429
                                                   Redmi angelican (Redmi 9C NFC)            7.0     1.400000             5         3.0   0.600000  140.000000    60.000000
                                                        Xiaomi amber (Xiaomi 11T)            7.0     1.400000             5         4.0   0.800000  140.000000    80.000000
                                                            Redmi fog (Redmi 10C)           11.0     1.375000             8        21.0   2.625000  137.500000   262.500000
                                                     xiaomi ginkgo (Redmi Note 8)           11.0     1.375000             8        23.0   2.875000  137.500000   287.500000
                                                           Redmi gale (Redmi 13C)           19.0     1.357143            14        24.0   1.714286  135.714286   171.428571
                                                         HONOR HNTFY-Q (HONOR X8)            4.0     1.333333             3         2.0   0.666667  133.333333    66.666667
                                                         Alldocube T1023 (X game)            4.0     1.333333             3         1.0   0.333333  133.333333    33.333333
                                              motorola deen_sprout (Motorola one)            4.0     1.333333             3         2.0   0.666667  133.333333    66.666667
                                                samsung hero2lte (Galaxy S7 edge)            4.0     1.333333             3         4.0   1.333333  133.333333   133.333333
                                                       Redmi lime (Redmi 9 Power)            4.0     1.333333             3         0.0   0.000000  133.333333     0.000000
                                                     HONOR HNALI-Q (HONOR X9b 5G)            4.0     1.333333             3         2.0   0.666667  133.333333    66.666667
                                              realme RE5C82L1 (realme 12 Pro+ 5G)            4.0     1.333333             3         2.0   0.666667  133.333333    66.666667
                                                      HUAWEI HWRNE (Mate 10 lite)            4.0     1.333333             3         4.0   1.333333  133.333333   133.333333
                                         Infinix Infinix-X6528 (Infinix HOT 40i )            4.0     1.333333             3         2.0   0.666667  133.333333    66.666667
                                                  HUAWEI HWAMN-M (HUAWEI Y5 2019)            8.0     1.333333             6         6.0   1.000000  133.333333   100.000000
                                                    OPPO OP56DBL1 (Reno10 Pro 5G)            4.0     1.333333             3         4.0   1.333333  133.333333   133.333333
                                                      realme RED8F6 (realme C25Y)            4.0     1.333333             3         2.0   0.666667  133.333333    66.666667
                                                       samsung a21s (Galaxy A21s)            8.0     1.333333             6        14.0   2.333333  133.333333   233.333333
                                                       samsung a04s (Galaxy A04s)           12.0     1.333333             9        15.0   1.666667  133.333333   166.666667
                                                         samsung a51 (Galaxy A51)           12.0     1.333333             9         3.0   0.333333  133.333333    33.333333
                                                         samsung a50 (Galaxy A50)            4.0     1.333333             3         4.0   1.333333  133.333333   133.333333
                                                              OPPO OP5AE7L1 (A60)           17.0     1.307692            13         9.0   0.692308  130.769231    69.230769
                                                            vivo 1807 (vivo 1807)            9.0     1.285714             7        12.0   1.714286  128.571429   171.428571
                                                     samsung a54x (Galaxy A54 5G)           14.0     1.272727            11        10.0   0.909091  127.272727    90.909091
                                                            OPPO OP4C7D (CPH2015)           14.0     1.272727            11        11.0   1.000000  127.272727   100.000000
                                                            SG SG706SH (AQUOS R2)            5.0     1.250000             4         2.0   0.500000  125.000000    50.000000
                                                           xiaomi ysl (Redmi  S2)            5.0     1.250000             4        10.0   2.500000  125.000000   250.000000
                                                    xiaomi willow (Redmi Note 8T)            5.0     1.250000             4         8.0   2.000000  125.000000   200.000000
                                                       realme RE58BC (realme C51)           10.0     1.250000             8        13.0   1.625000  125.000000   162.500000
                                                   Redmi merlinnfc (Redmi Note 9)            5.0     1.250000             4         2.0   0.500000  125.000000    50.000000
                                              Redmi garnet (Redmi Note 13 Pro 5G)            5.0     1.250000             4         2.0   0.500000  125.000000    50.000000
                                                     realme RE5C91L1 (realme C67)            5.0     1.250000             4         3.0   0.750000  125.000000    75.000000
                                                            OPPO OP571F (CPH2387)            5.0     1.250000             4         7.0   1.750000  125.000000   175.000000
                                          Infinix Infinix-X669C (Infinix HOT 30i)            5.0     1.250000             4         6.0   1.500000  125.000000   150.000000
                                               motorola evert_nt (moto g(6) plus)            6.0     1.200000             5         8.0   1.600000  120.000000   160.000000
                                                                 reeder M7S (M7S)            6.0     1.200000             5         2.0   0.400000  120.000000    40.000000
                                                       samsung a05s (Galaxy A05s)            6.0     1.200000             5         6.0   1.200000  120.000000   120.000000
                                                            vivo 1808 (vivo 1808)            6.0     1.200000             5         1.0   0.200000  120.000000    20.000000
                                                         Redmi aether (Redmi 12C)            6.0     1.200000             5         0.0   0.000000  120.000000     0.000000
                                                        HONOR HNWDY-M (HONOR X6a)            6.0     1.200000             5         4.0   0.800000  120.000000    80.000000
                                                   HONOR HNCRT-M2 (HONOR 90 Lite)            6.0     1.200000             5         2.0   0.400000  120.000000    40.000000
                                                      realme RE54B4L1 (realme 8i)            7.0     1.166667             6         7.0   1.166667  116.666667   116.666667
                                                            vivo 1726 (vivo 1726)            7.0     1.166667             6         3.0   0.500000  116.666667    50.000000
                                                            OPPO OP4BFB (CPH2083)           15.0     1.153846            13        14.0   1.076923  115.384615   107.692308
                                                                vivo 2120 (V2120)            8.0     1.142857             7         3.0   0.428571  114.285714    42.857143
                                               TECNO TECNO-BG7n (TECNO SPARK 20C)           11.0     1.100000            10         4.0   0.400000  110.000000    40.000000
                                                         samsung a15 (Galaxy A15)           11.0     1.100000            10         7.0   0.700000  110.000000    70.000000
                                                        samsung a12s (Galaxy A12)           11.0     1.100000            10        26.0   2.600000  110.000000   260.000000
                                                          HUAWEI HWANE (P20 lite)           13.0     1.083333            12        15.0   1.250000  108.333333   125.000000
                                                       Redmi spes (Redmi Note 11)           14.0     1.076923            13         5.0   0.384615  107.692308    38.461538
                                                            vivo 1820 (vivo 1820)           15.0     1.071429            14         7.0   0.500000  107.142857    50.000000
                                                     samsung a55x (Galaxy A55 5G)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                        SPC GRAVITY_4 (GRAVITY_4)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                         Redmi yunluo (Redmi Pad)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                               xiaomi violet (Redmi  Note  7 Pro)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                         samsung a21 (Galaxy A21)            3.0     1.000000             3         0.0   0.000000  100.000000     0.000000
                                              TECNO TECNO-LH6n (TECNO POVA Neo 3)            3.0     1.000000             3         1.0   0.333333  100.000000    33.333333
                                                     TECNO TECNO-KA7-GO (SPARK 2)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                                              sky            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                        samsung a20p (Galaxy A20)            1.0     1.000000             1         1.0   1.000000  100.000000   100.000000
                                            OnePlus OP557AL1 (OnePlus Nord 2T 5G)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                  OnePlus OnePlus9 (OnePlus 9 5G)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                            POCO alioth (POCO F3)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                       OnePlus OnePlusNordCE (OnePlus Nord CE 5G)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                    POCO redwood (POCO X5 Pro 5G)            4.0     1.000000             4         4.0   1.000000  100.000000   100.000000
                                                 samsung c9ltechn (Galaxy C9 Pro)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                       samsung d1 (Galaxy Note10)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                              samsung crownqltechn (Galaxy Note9)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                         HONOR HWKSA-M (HONOR 8S)            2.0     1.000000             2         3.0   1.500000  100.000000   150.000000
                                                          KDDI SCV46 (Galaxy A20)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                    samsung a23xq (Galaxy A23 5G)            3.0     1.000000             3         0.0   0.000000  100.000000     0.000000
                                                  motorola nora_8917 (Moto E (5))            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                           motorola sanders_nt (Moto G (5S) Plus)            2.0     1.000000             2         1.0   0.500000  100.000000    50.000000
                                                     motorola fijisc (moto e(6i))            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                         motorola foles (moto z4)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                         Infinix Infinix-X6815B (Infinix ZERO 5G)            1.0     1.000000             1         2.0   2.000000  100.000000   200.000000
                                                        motorola hawao (moto g42)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                           motorola kyoto (motorola edge 20 lite)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                           OPPO CPH1909 (CPH1909)           14.0     1.000000            14        23.0   1.642857  100.000000   164.285714
                                                 motorola lake_n (moto g(7) plus)            2.0     1.000000             2         1.0   0.500000  100.000000    50.000000
                                                                OPPO OP486C (A1k)            8.0     1.000000             8        10.0   1.250000  100.000000   125.000000
                                                        samsung a70q (Galaxy A70)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                                                vivo 2126 (V2126)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                       Alcatel Seoul (Alcatel 1B)            3.0     1.000000             3         1.0   0.333333  100.000000    33.333333
                                                TECNO TECNO-KG7 (TECNO SPARK 8 P)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                         asus ASUS_AI2203 (ROG Phone 6D Ultimate)            1.0     1.000000             1         1.0   1.000000  100.000000   100.000000
                                                  TECNO TECNO-LG7n (TECNO POVA 4)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                  TECNO TECNO-LF7n (TECNO POVA 3)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                       TECNO TECNO-LE6 (POVA Neo)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                             TECNO TECNO-KJ6 (TECNO SPARK 20 Pro)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                             TECNO TECNO-KI7 (TECNO SPARK 10 Pro)            8.0     1.000000             8        10.0   1.250000  100.000000   125.000000
                                                      Xiaomi olivelite (Redmi 8A)            3.0     1.000000             3         5.0   1.666667  100.000000   166.666667
                                                           Xiaomi olive (Redmi 8)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                                WIKO W-V673 (T10)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                                   xiaomi ugglite (Redmi Note 5A)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                             xiaomi onc (Redmi 7)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                      samsung r8s (Galaxy S20 FE)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                            Lenovo TB-X505F (Lenovo Tab M10 (HD))            1.0     1.000000             1         9.0   9.000000  100.000000   900.000000
                                                         samsung r0q (Galaxy S22)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                      realme RMX2180 (realme C15)            4.0     1.000000             4         0.0   0.000000  100.000000     0.000000
                                     reeder P13_Blue_Pro_2022 (P13_Blue_Pro_2022)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                      samsung a03su (Galaxy A03s)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                  TECNO TECNO-BC2c (TECNO POP 4 )            1.0     1.000000             1         1.0   1.000000  100.000000   100.000000
                                                            vivo 1907 (vivo 1907)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                                TECNO TECNO-KI5q (TECNO SPARK 10)            6.0     1.000000             6        18.0   3.000000  100.000000   300.000000
                                               TECNO TECNO-KI5m (TECNO SPARK 10C)            1.0     1.000000             1         1.0   1.000000  100.000000   100.000000
                                                            vivo 1718 (vivo 1718)            3.0     1.000000             3         3.0   1.000000  100.000000   100.000000
                                                      TECNO TECNO-BE8 (POP 6 Pro)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                            TECNO TECNO-CK6n (TECNO CAMON 20 PRO)            2.0     1.000000             2         2.0   1.000000  100.000000   100.000000
                                                     TECNO TECNO-CH7n (CAMON 18P)            1.0     1.000000             1         3.0   3.000000  100.000000   300.000000
                                                     samsung q5q (Galaxy Z Fold5)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                                 vivo 1601 (1601)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                                            vivo 1714 (vivo 1714)            2.0     1.000000             2         3.0   1.500000  100.000000   150.000000
                                                      TECNO TECNO-CG6j (CAMON 17)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                            vivo V2207 (vivo Y22)            7.0     1.000000             7        14.0   2.000000  100.000000   200.000000
                                                           ELEVATE G62 (LUNA G62)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                         HUAWEI HWFIG-H (FIG-LX1)            2.0     1.000000             2         2.0   1.000000  100.000000   100.000000
         asus ASUS_X01A_1 (ZenFone Max M2 (ZB633KL) (WW) / Max M2 (ZB632KL) (IN))            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                  asus ASUS_X00LD_3 (ZenFone Live Plus (ZB553KL))            1.0     1.000000             1         1.0   1.000000  100.000000   100.000000
                                                  OPPO OP4FA7L1 (OPPO Reno6 Z 5G)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                                        HUAWEI HWWAS-H (P10 lite)            2.0     1.000000             2         1.0   0.500000  100.000000    50.000000
                                                              TCL Cruze (TCL 30E)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                           Infinix Infinix-X627STU (Smart 3 Plus)            3.0     1.000000             3         2.0   0.666667  100.000000    66.666667
                                                         samsung m04 (Galaxy M04)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                         TCL Civic_Plus (TCL 408)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                           Multilaser Multilaser_F (Multilaser_F)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                            vivo 1915 (vivo 1915)            1.0     1.000000             1         1.0   1.000000  100.000000   100.000000
                                                    Nokia DRSA_sprout (Nokia 3.4)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                            vivo 1919 (vivo 1919)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                   Itel itel-W6503 (itel S16 Pro)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                                                vivo V2342 (Y100)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                       samsung a13ve (Galaxy A13)            5.0     1.000000             5        13.0   2.600000  100.000000   260.000000
                                                                         Doha_TMO            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                         samsung m21 (Galaxy M21)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                         samsung m32 (Galaxy M32)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                     samsung m53x (Galaxy M53 5G)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                            OPPO OP5325 (CPH2421)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                                OPPO OP5335L1 (OPPO Reno8 Pro 5G)            1.0     1.000000             1         1.0   1.000000  100.000000   100.000000
                                                          OPPO OP56E1L1 (CPH2455)            2.0     1.000000             2         1.0   0.500000  100.000000    50.000000
                                                       OPPO OP56EDL1 (Reno8 T 5G)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                                OPPO CPH1901 (A7)            9.0     1.000000             9         9.0   1.000000  100.000000   100.000000
                                                          OPPO CPH1859 (realme 1)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                                OPPO OP56F5 (A17)           10.0     1.000000            10        10.0   1.000000  100.000000   100.000000
                                                  samsung dreamqltesq (Galaxy S8)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                                        OPPO OP5705L1 (Reno10 5G)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                         Xiaomi tiare (Redmi  Go)            5.0     1.000000             5         6.0   1.200000  100.000000   120.000000
                                                 ZTE P963F01 (ZTE Blade V10 Vita)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                                 EVERCOSS M6 (M6)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                   Xiaomi taoyao (Xiaomi 12 Lite)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                           lge mcv7a (LG Stylo 4)            1.0     1.000000             1         1.0   1.000000  100.000000   100.000000
                                                                lge mcv3 (LG K11)            3.0     1.000000             3         5.0   1.666667  100.000000   166.666667
                                                            Itel itel-A571L (A26)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                           samsung gtaxllte (Galaxy Tab A (2016))            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                     samsung gta8 (Galaxy Tab A8)            2.0     1.000000             2         1.0   0.500000  100.000000    50.000000
                                       Infinix Infinix-X6516 (Infinix SMART 7 HD)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                                 Infinix Infinix-X6511E (SMART 6)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                     Nokia DRS_sprout (Nokia 3.4)            3.0     1.000000             3         2.0   0.666667  100.000000    66.666667
                                                           OPPO CPH1727 (CPH1727)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                                           OPPO CPH1717 (CPH1717)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                   samsung e3q (Galaxy S24 Ultra)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                        motorola devon (moto g32)            2.0     1.000000             2         5.0   2.500000  100.000000   250.000000
                                       Infinix Infinix-X680B (Infinix HOT 9 Play)            3.0     1.000000             3         2.0   0.666667  100.000000    66.666667
                                               samsung j5y17ltedx (Galaxy J5 Pro)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                              Infinix Infinix-X678B (NOTE 30 Pro)            3.0     1.000000             3         1.0   0.333333  100.000000    33.333333
                                                            HUAWEI HWPAR (nova 3)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                            vivo 1802 (vivo 1802)            4.0     1.000000             4         7.0   1.750000  100.000000   175.000000
                                             HUAWEI HWPOT-H (HUAWEI P smart 2019)            3.0     1.000000             3         0.0   0.000000  100.000000     0.000000
                                                  HUAWEI HWFLA-H (HUAWEI Y9 2018)            2.0     1.000000             2        12.0   6.000000  100.000000   600.000000
                                       Infinix Infinix-X6837 (Infinix HOT 40 Pro)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                 Infinix Infinix-X688C (HOT Play)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                           OPPO OP52F3L1 (A78 5G)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                               lge mmh45lm (K50S)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                                    Infinix-X6816            3.0     1.000000             3         4.0   1.333333  100.000000   133.333333
                                                motorola def (motorola one hyper)            2.0     1.000000             2         2.0   1.000000  100.000000   100.000000
                                                samsung j7xelte (Galaxy J7(2016))            2.0     1.000000             2         3.0   1.500000  100.000000   150.000000
                                                        Redmi XIG03 (Redmi 12 5G)            3.0     1.000000             3         3.0   1.000000  100.000000   100.000000
                                                          xiaomi cereus (Redmi 6)            3.0     1.000000             3         1.0   0.333333  100.000000    33.333333
                                                                 vivo V2236 (Y02)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                          samsung gto (Galaxy Tab A (8.0", 2019))            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                            samsung gts7xllite (Galaxy Tab S7 FE)            1.0     1.000000             1         3.0   3.000000  100.000000   300.000000
                                                                         OP5312L1            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                          OPPO OP4EF3L1 (CPH2095)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                          OPPO OP4B79L1 (A5 2020)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                            OPPO OP4B9EL1 (Reno3)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                                                OPPO OP4883 (F11)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                               Itel itel-A16-Plus (itel-A16-Plus)            1.0     1.000000             1         3.0   3.000000  100.000000   300.000000
                                          Infinix Infinix-X689B (Infinix HOT 10S)            2.0     1.000000             2         1.0   0.500000  100.000000    50.000000
                                                motorola sofiar (moto g(8) power)            2.0     1.000000             2         1.0   0.500000  100.000000    50.000000
                                                       samsung dm2q (Galaxy S23+)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                                vivo 2037 (V2037)            3.0     1.000000             3         2.0   0.666667  100.000000    66.666667
                                                          Itel itel-A663LC (A05s)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                      Itel itel-A632WM (itel A04)            1.0     1.000000             1         3.0   3.000000  100.000000   300.000000
                                                             positivo T810 (T810)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                       realme RE549C (realme C31)            3.0     1.000000             3         0.0   0.000000  100.000000     0.000000
                                                    Lenovo J606F (Lenovo Tab P11)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                 realme RED8AF (realme narzo 50A)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                              realme RE58A5L1 (realme 10 Pro+ 5G)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                                    Pearl_K3_2021            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                          Redmi galahad (Redmi 9)            6.0     1.000000             6         6.0   1.000000  100.000000   100.000000
                                                              vivo V2248 (Y36 5G)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                                vivo 2109 (V2109)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                                        HONOR HNANY-Q1 (HONOR X9)            2.0     1.000000             2         0.0   0.000000  100.000000     0.000000
                                                                 vivo 2149 (Y21A)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                       motorola nio (moto g(100))            1.0     1.000000             1         2.0   2.000000  100.000000   200.000000
                                       Infinix Infinix-X668C (Infinix HOT 12 PRO)            3.0     1.000000             3         1.0   0.333333  100.000000    33.333333
                                                                 vivo V2204 (Y16)            6.0     1.000000             6         5.0   0.833333  100.000000    83.333333
                                                      HONOR HNVNE-Q (HONOR X8 5G)            1.0     1.000000             1         1.0   1.000000  100.000000   100.000000
                                                         HONOR HWCOR (Honor Play)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                          Redmi cattail (Redmi 9)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                 Blackview BV6200Pro (BV6200 Pro)            1.0     1.000000             1         4.0   4.000000  100.000000   400.000000
                                                         HONOR HWDUA-M (Honor 7A)           10.0     1.000000            10         9.0   0.900000  100.000000    90.000000
                                                      xiaomi vince (Redmi 5 Plus)            1.0     1.000000             1         0.0   0.000000  100.000000     0.000000
                                                     motorola fogos (moto g34 5G)           11.0     0.916667            12        18.0   1.500000   91.666667   150.000000
                                                                        TECNO-BG7           10.0     0.909091            11         9.0   0.818182   90.909091    81.818182
                                                         samsung a04 (Galaxy A04)            8.0     0.888889             9        23.0   2.555556   88.888889   255.555556
                                                  TECNO TECNO-CK7n (CAMON 20 Pro)            6.0     0.857143             7         7.0   1.000000   85.714286   100.000000
                                                       Redmi dandelion (Redmi 9A)           12.0     0.857143            14        17.0   1.214286   85.714286   121.428571
                                                  ZTE P932F50 (ZTE Blade A3 2020)            6.0     0.857143             7         6.0   0.857143   85.714286    85.714286
                                             Infinix Infinix-X6525B (SMRAT 8 PRO)            5.0     0.833333             6        10.0   1.666667   83.333333   166.666667
                                                           OPPO CPH1853 (CPH1853)           10.0     0.833333            12        22.0   1.833333   83.333333   183.333333
                                                        motorola rhode (moto g52)           10.0     0.833333            12        14.0   1.166667   83.333333   116.666667
                                                        Redmi angelica (Redmi 9C)            5.0     0.833333             6         3.0   0.500000   83.333333    50.000000
                                                  HUAWEI HWJKM-H (HUAWEI Y9 2019)            4.0     0.800000             5         2.0   0.400000   80.000000    40.000000
                                                           OPPO CPH1903 (CPH1903)            8.0     0.800000            10         7.0   0.700000   80.000000    70.000000
                                                                 vivo V2250 (V29)            4.0     0.800000             5         3.0   0.600000   80.000000    60.000000
                                                        samsung a11q (Galaxy A11)            8.0     0.800000            10        19.0   1.900000   80.000000   190.000000
                                                                vivo V2254 (Y02t)            4.0     0.800000             5         3.0   0.600000   80.000000    60.000000
                                                         OPPO RMX1811 (realme C1)            4.0     0.800000             5         5.0   1.000000   80.000000   100.000000
                                                           HONOR HNREA (HONOR 90)            3.0     0.750000             4         1.0   0.250000   75.000000    25.000000
                                                       realme RE58C2 (realme C53)            3.0     0.750000             4         1.0   0.250000   75.000000    25.000000
                                                                 vivo V2249 (Y27)            3.0     0.750000             4         3.0   0.750000   75.000000    75.000000
                                                   Redmi sapphire (Redmi Note 13)            3.0     0.750000             4         5.0   1.250000   75.000000   125.000000
                                                     samsung a34x (Galaxy A34 5G)            6.0     0.750000             8        14.0   1.750000   75.000000   175.000000
                                                   Redmi viva (Redmi Note 11 Pro)            3.0     0.750000             4         9.0   2.250000   75.000000   225.000000
                                                         realme RMX3231 (RMX3231)            6.0     0.750000             8         5.0   0.625000   75.000000    62.500000
                                                     Itel itel-L6502 (Vision1Pro)            3.0     0.750000             4         1.0   0.250000   75.000000    25.000000
                                                    Lenovo X104L (Lenovo Tab E10)            3.0     0.750000             4         7.0   1.750000   75.000000   175.000000
                                                     samsung j7y17lte (Galaxy J7)            5.0     0.714286             7         5.0   0.714286   71.428571    71.428571
                                                         xiaomi cactus (Redmi 6A)            5.0     0.714286             7         7.0   1.000000   71.428571   100.000000
                                           TECNO TECNO-KG5m (TECNO SPARK Go 2022)            2.0     0.666667             3         2.0   0.666667   66.666667    66.666667
                                                      xiaomi mido ( Redmi Note 4)            4.0     0.666667             6         3.0   0.500000   66.666667    50.000000
                                                         samsung a22 (Galaxy A22)            2.0     0.666667             3         1.0   0.333333   66.666667    33.333333
                                                                       TECNO-BD4a            4.0     0.666667             6         7.0   1.166667   66.666667   116.666667
                                                              OPPO OP5745L1 (A78)            4.0     0.666667             6         6.0   1.000000   66.666667   100.000000
                                                         samsung a31 (Galaxy A31)            2.0     0.666667             3         2.0   0.666667   66.666667    66.666667
                                                   samsung r8q (Galaxy S20 FE 5G)            2.0     0.666667             3         1.0   0.333333   66.666667    33.333333
                                                   samsung r9s (Galaxy S21 FE 5G)            2.0     0.666667             3         3.0   1.000000   66.666667   100.000000
                                                     samsung a25x (Galaxy A25 5G)            2.0     0.666667             3         1.0   0.333333   66.666667    33.333333
                                                      HONOR HNNTN (HONOR 50 Lite)            2.0     0.666667             3         3.0   1.000000   66.666667   100.000000
                                                        samsung j6lte (Galaxy J6)            4.0     0.666667             6         7.0   1.166667   66.666667   116.666667
                                                 Redmi begonia (Redmi Note 8 Pro)            2.0     0.666667             3         1.0   0.333333   66.666667    33.333333
                                                   HUAWEI HWMAR (HUAWEI P30 lite)            2.0     0.666667             3         6.0   2.000000   66.666667   200.000000
                                                                vivo 2034 (V2068)            2.0     0.666667             3         7.0   2.333333   66.666667   233.333333
                                         DeutscheTelekom Leopard (T Phone ?2023?)            2.0     0.666667             3         1.0   0.333333   66.666667    33.333333
                                                        samsung j4lte (Galaxy J4)            4.0     0.666667             6        13.0   2.166667   66.666667   216.666667
                                                  motorola evert (moto g(6) plus)            2.0     0.666667             3         3.0   1.000000   66.666667   100.000000
                                                   samsung dream2lte (Galaxy S8+)            2.0     0.666667             3         2.0   0.666667   66.666667    66.666667
                                                        samsung a14m (Galaxy A14)            5.0     0.625000             8         8.0   1.000000   62.500000   100.000000
                                                  Infinix Infinix-X665B (HOT 12i)            3.0     0.600000             5         5.0   1.000000   60.000000   100.000000
                                                     realme RE87BAL1 (realme C35)            3.0     0.600000             5        16.0   3.200000   60.000000   320.000000
                                                           Redmi lemon (Redmi 9T)            3.0     0.600000             5         3.0   0.600000   60.000000    60.000000
                                                  Infinix Infinix-X653C (Smart 4)            3.0     0.600000             5         5.0   1.000000   60.000000   100.000000
                                                 HUAWEI HWATU-QG (HUAWEI Y6 2018)            4.0     0.571429             7         7.0   1.000000   57.142857   100.000000
                                                            vivo 1817 (vivo 1817)            4.0     0.571429             7         8.0   1.142857   57.142857   114.285714
                                                 HUAWEI HWDRA-MG (HUAWEI Y5 lite)            4.0     0.571429             7         3.0   0.428571   57.142857    42.857143
                                                     samsung a53x (Galaxy A53 5G)            5.0     0.555556             9        10.0   1.111111   55.555556   111.111111
                                                    Xiaomi toco (Mi Note 10 Lite)            1.0     0.500000             2         2.0   1.000000   50.000000   100.000000
                                                            HONOR HWSTF (Honor 9)            3.0     0.500000             6        10.0   1.666667   50.000000   166.666667
                                        samsung gta7litewifi (Galaxy Tab A7 Lite)            1.0     0.500000             2         2.0   1.000000   50.000000   100.000000
                                                    samsung a73xq (Galaxy A73 5G)            2.0     0.500000             4         4.0   1.000000   50.000000   100.000000
                                            Infinix Infinix-X6823C (SMART 6 PLUS)            1.0     0.500000             2         2.0   1.000000   50.000000   100.000000
                                                      OPPO RMX1801 (realme 2 Pro)            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                                          Redmi water (Redmi A2+)            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                                     Itel itel-A662LM (itel A60s)            1.0     0.500000             2         2.0   1.000000   50.000000   100.000000
                                                            Itel itel-W6501 (P36)            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                                            Itel itel-A665L (A70)            4.0     0.500000             8        20.0   2.500000   50.000000   250.000000
                                            TECNO TECNO-CH6i (TECNO CAMON 19 Neo)            2.0     0.500000             4         3.0   0.750000   50.000000    75.000000
                                                     samsung t2s (Galaxy S21+ 5G)            1.0     0.500000             2         3.0   1.500000   50.000000   150.000000
                                                 TECNO TECNO-LH8n (POVA 5 Pro 5G)            2.0     0.500000             4        10.0   2.500000   50.000000   250.000000
                                             TECNO TECNO-KH7n (TECNO SPARK 9 Pro)            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                                         realme RMX3063 (RMX3063)            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                                  TECNO TECNO-BB4k (SPARK 4 Lite)            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                                            vivo 1724 (vivo 1724)            2.0     0.500000             4         9.0   2.250000   50.000000   225.000000
                                          samsung gtaxlwifi (Galaxy Tab A (2016))            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                                                lge meh35lm (K52)            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                                Nokia PDA_sprout (Nokia 5.1 Plus)            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                                        motorola vicky (moto g72)            1.0     0.500000             2         4.0   2.000000   50.000000   200.000000
                                                      realme RED8C1L1 (realme 9i)            3.0     0.500000             6         4.0   0.666667   50.000000    66.666667
                                                         samsung a32 (Galaxy A32)            2.0     0.500000             4         2.0   0.500000   50.000000    50.000000
                                               motorola cedric (Moto G (5th Gen))            2.0     0.500000             4         6.0   1.500000   50.000000   150.000000
                                    realme RECE4244 (realme C15 Qualcomm Edition)            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                                      realme RE8DDCL1 (realme 10)            3.0     0.500000             6        11.0   1.833333   50.000000   183.333333
                                                      motorola capri (moto g(10))            1.0     0.500000             2         8.0   4.000000   50.000000   400.000000
                                           TECNO TECNO-LI9 (TECNO POVA 6 Pro 5G )            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                       Lenovo X306X (Lenovo Tab M10 HD (2nd Gen))            1.0     0.500000             2        12.0   6.000000   50.000000   600.000000
                                                     xiaomi tissot_sprout (Mi A1)            1.0     0.500000             2         2.0   1.000000   50.000000   100.000000
                                               xiaomi tulip (Redmi  Note  6  Pro)            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                                TECNO TECNO-KG5k (TECNO SPARK 8C)            1.0     0.500000             2         3.0   1.500000   50.000000   150.000000
                                                    samsung m23xq (Galaxy F23 5G)            2.0     0.500000             4         5.0   1.250000   50.000000   125.000000
                                                   samsung dreamlteks (Galaxy S8)            1.0     0.500000             2         4.0   2.000000   50.000000   200.000000
                                                                     Transformer2            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                                           Xiaomi riva (Redmi 5A)            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                           Infinix Infinix-X669 (Infinix HOT 30i)            3.0     0.500000             6        13.0   2.166667   50.000000   216.666667
                                                     Redmi selene (Redmi 10 2022)            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                                                vivo V2206 (Y22s)            2.0     0.500000             4         4.0   1.000000   50.000000   100.000000
                                                                TCL Rio (TCL 403)            1.0     0.500000             2         2.0   1.000000   50.000000   100.000000
                                                     Nokia DRD_sprout (Nokia 5.4)            1.0     0.500000             2         3.0   1.500000   50.000000   150.000000
                                                            vivo 1723 (vivo 1723)            1.0     0.500000             2         2.0   1.000000   50.000000   100.000000
                                                  Sparx Neo_7_Ultra (Neo 7 Ultra)            1.0     0.500000             2         1.0   0.500000   50.000000    50.000000
                                                         samsung a71 (Galaxy A71)            4.0     0.444444             9         8.0   0.888889   44.444444    88.888889
                                               TECNO TECNO-KI5k (TECNO SPARK 10C)            3.0     0.428571             7        10.0   1.428571   42.857143   142.857143
                                                              OPPO OP5759L1 (A38)            2.0     0.400000             5         3.0   0.600000   40.000000    60.000000
                                     Infinix Infinix-X6835B (Infinix HOT 30 PLAY)            2.0     0.400000             5         4.0   0.800000   40.000000    80.000000
                                                  samsung j6primelte (Galaxy J6+)            2.0     0.400000             5         5.0   1.000000   40.000000   100.000000
                                                 HUAWEI HWMRD-M1 (HUAWEI Y6 2019)            2.0     0.400000             5        11.0   2.200000   40.000000   220.000000
                                                            Nokia PHR (Nokia G50)            1.0     0.333333             3         3.0   1.000000   33.333333   100.000000
                                       Infinix Infinix-X6711 (Infinix NOTE 30 5G)            1.0     0.333333             3         2.0   0.666667   33.333333    66.666667
                                                  samsung a52sxq (Galaxy A52s 5G)            1.0     0.333333             3         3.0   1.000000   33.333333   100.000000
                                                       Itel itel-S665L (itel S23)            1.0     0.333333             3         4.0   1.333333   33.333333   133.333333
                                                        samsung m11q (Galaxy M11)            1.0     0.333333             3         3.0   1.000000   33.333333   100.000000
                                                      realme RMX3191 (realme C25)            1.0     0.333333             3         2.0   0.666667   33.333333    66.666667
                                  TECNO-Mobile TECNO-Mobile-KI5q (TECNO SPARK 10)            1.0     0.333333             3         2.0   0.666667   33.333333    66.666667
                                                    HUAWEI HWYAL (HUAWEI nova 5T)            1.0     0.333333             3         2.0   0.666667   33.333333    66.666667
                                                        realme RE5C94L1 (RMX3998)            1.0     0.333333             3         4.0   1.333333   33.333333   133.333333
                                                  samsung j7velte (Galaxy J7 Neo)            1.0     0.333333             3         3.0   1.000000   33.333333   100.000000
                                                          Xiaomi rolex (Redmi 4A)            2.0     0.333333             6         6.0   1.000000   33.333333   100.000000
                                                           Redmi cloud (Redmi A2)            1.0     0.333333             3         2.0   0.666667   33.333333    66.666667
                                                    Redmi secret (Redmi Note 10S)            2.0     0.333333             6         9.0   1.500000   33.333333   150.000000
                                                    Infinix Infinix-X657B (SMART)            1.0     0.250000             4         4.0   1.000000   25.000000   100.000000
                                                          OPPO OP4F43L1 (F19 Pro)            1.0     0.250000             4         5.0   1.250000   25.000000   125.000000
                                                         motorola namath (Moto C)            1.0     0.250000             4         3.0   0.750000   25.000000    75.000000
                                                   gm gm5plus_s_sprout (GM5 Plus)            1.0     0.250000             4         3.0   0.750000   25.000000    75.000000
                                                         samsung a20 (Galaxy A20)            1.0     0.250000             4         4.0   1.000000   25.000000   100.000000
                                                     samsung a22x (Galaxy A22 5G)            1.0     0.200000             5         8.0   1.600000   20.000000   160.000000
                                                                           BT-840            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                            CUBOT KINGKONGMINI3 (KINGKONG MINI 3)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                               Cricket U380AC (Cricket Debut S2™)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                                 vivo V2231 (V27)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                       xiaomi ugg (Redmi Note 5A)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                samsung a5xelte (Galaxy A5(2016))            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                      samsung a6plte (Galaxy A6+)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                        samsung a72q (Galaxy A72)            0.0     0.000000             3         5.0   1.666667    0.000000   166.666667
                                                    samsung beyond2 (Galaxy S10+)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                     samsung d2s (Galaxy Note10+)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                       samsung a50s (Galaxy A50s)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                               samsung a2corelte (Galaxy A2 Core)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                         HONOR HWBND-H (Honor 7X)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                     realme RMX1971 (realme 5pro)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                      realme RMX2002L1 (realme 6)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                         realme RMX2040 (RMX2040)            0.0     0.000000             1         2.0   2.000000    0.000000   200.000000
                                                      realme RMX2189 (realme C12)            0.0     0.000000             2         5.0   2.500000    0.000000   250.000000
                                                  realme RMX2202L1 (realme GT 5G)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                        samsung g0s (Galaxy S22+)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                        samsung gta4xlvewifi (Galaxy Tab S6 Lite)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                               samsung gta9pwifi (Galaxy Tab A9+)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                       realme RE5C42 (realme C65)            0.0     0.000000             1         2.0   2.000000    0.000000   200.000000
                                                   realme RE5C6CL1 (realme 11 5G)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                          realme RED8D7 (RMX3268)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                             motorola tonga (moto g power (2022))            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                        motorola tundra (motorola edge 30 fusion)            0.0     0.000000             2         2.0   1.000000    0.000000   100.000000
                                        realme RE54ABL1 (realme GT Master Edition            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                 realme RE54CBL1 (realme 9Pro 5G)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                              motorola nairo (motorola one 5G UW)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                    motorola rhodep (moto g82 5G)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                   motorola guam (moto e(7) plus)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                            lge mmh55lm (LG K40S)            0.0     0.000000             2         4.0   2.000000    0.000000   200.000000
                                                             lge mmh6lm (LM-X120)            0.0     0.000000             3         5.0   1.666667    0.000000   166.666667
                                                        motorola aruba (moto e20)            0.0     0.000000             4         4.0   1.000000    0.000000   100.000000
                                                 samsung gts3llte (Galaxy Tab S3)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                     samsung herolte (Galaxy S7 )            0.0     0.000000             2         2.0   1.000000    0.000000   100.000000
                                            samsung j2corey20lte (Galaxy J2 Core)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                                lge mdh40lm (K61)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                               HUAWEI HWAGS2 (HUAWEI MediaPad T5)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                     HUAWEI HWBG2 (MediaPad T3 7)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                            HUAWEI HWDRA-M (HUAWEI Y5 Prime 2018)            0.0     0.000000             2         2.0   1.000000    0.000000   100.000000
asus ASUS_X01BD_2 (ZenFone Max Pro M2 (ZB631KL) (WW) / Max Pro M2 (ZB630KL) (IN))            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                 docomo SO-02J (Xperia X Compact)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                                            frost            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                                   lge elsa (V20)            0.0     0.000000             1         3.0   3.000000    0.000000   300.000000
                                                      lge mdh10xlm (LG Harmony 4)            0.0     0.000000             1         2.0   2.000000    0.000000   200.000000
                                                    Xiaomi renoir (Mi 11 Lite 5G)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                   asus ASUS_X00LD_1 (ZenFone 4 Selfie (ZD553KL))            0.0     0.000000             1         2.0   2.000000    0.000000   200.000000
 asus ASUS_X00T_2 (ZenFone Max Pro M1 (ZB602KL) (WW) / Max Pro M1 (ZB601KL) (IN))            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                     samsung jadelte (Galaxy J7+)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                         samsung m22 (Galaxy M22)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                      samsung m33x (Galaxy Jump2)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                          Teclast P85T (P85T_ROW)            0.0     0.000000             1         6.0   6.000000    0.000000   600.000000
                                                             WIKO W-K130 (Sunny4)            0.0     0.000000             1         3.0   3.000000    0.000000   300.000000
                                                       Xiaomi davinci (Redmi K20)            0.0     0.000000             1         3.0   3.000000    0.000000   300.000000
                                                     Xiaomi laurel_sprout (Mi A3)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                 TECNO TECNO-KJ5 (TECNO SPARK 20)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                     TECNO TECNO-LC7 ( Pouvoir 4)            0.0     0.000000             2         2.0   1.000000    0.000000   100.000000
                                              TECNO TECNO-CK8n (CAMON 20 Pro 5G )            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                 TECNO TECNO-CL6 (TECNO CAMON 30)            0.0     0.000000             1         2.0   2.000000    0.000000   200.000000
                                                TECNO TECNO-KF6p (TECNO SPARK 7T)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                  TECNO TECNO-KG6 (TECNO SPARK 8)            0.0     0.000000             1         2.0   2.000000    0.000000   200.000000
                                                    TECNO TECNO-BC2 (TECNO POP 4)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                   TECNO TECNO-BD2d (TECNO POP 5)            0.0     0.000000             2         4.0   2.000000    0.000000   200.000000
                                                   TECNO TECNO-CD6 (CAMON 15 Air)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                      TECNO TECNO-CH6n (CAMON 18)            0.0     0.000000             1         2.0   2.000000    0.000000   200.000000
                                                samsung p3s (Galaxy S21 Ultra 5G)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                 samsung star2qltesq (Galaxy S9+)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                  samsung starqltechn (Galaxy S9)            0.0     0.000000             1         3.0   3.000000    0.000000   300.000000
                                                 samsung zerolte (Galaxy S6 edge)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                             Redmi zircon (Redmi Note 13 Pro+ 5G)            0.0     0.000000             2         2.0   1.000000    0.000000   100.000000
                                          SKYDevices SKY_PADMaxNAL (SKY PAD10Max)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                          Sony G8231 (Xperia XZs)            0.0     0.000000             1         5.0   5.000000    0.000000   500.000000
                                                           TCL U3A_PLUS_4G (GO5c)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                               Infinix Infinix-X6511 (SMART 5 HD)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                  Redmi sapphiren (Redmi Note 13)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                           Redmi rain (Redmi 10C)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                  Redmi rosemary (Redmi Note 10S)            0.0     0.000000             3         3.0   1.000000    0.000000   100.000000
                                                              HYUNDAI E553 (E553)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                  Redmi light (Redmi 11 Prime 5G)            0.0     0.000000             1         2.0   2.000000    0.000000   200.000000
                                                           POCO xaga (POCO X4 GT)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                       Philco HIT_P10a (HIT P10a)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                         Philco Hit_P12 (Hit P12)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                  POCO chopin (Redmi Note 10 Pro)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                         POCO ingres (POCO F4 GT)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                            POCO marble (POCO F5)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                          OPPO OP571DL1 (CPH2385)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                          OPPO OP5ABFL1 (Reno 11)            0.0     0.000000             2         5.0   2.500000    0.000000   250.000000
                                                          OPPO OP4F11L1 (CPH2219)            0.0     0.000000             3         4.0   1.333333    0.000000   133.333333
                                                          OPPO OP5637L1 (CPH2473)            0.0     0.000000             2         2.0   1.000000    0.000000   100.000000
                                                            OPPO OP4863 (F11 Pro)            0.0     0.000000             1         9.0   9.000000    0.000000   900.000000
                                                  Infinix Infinix-X665C (HOT 20i)            0.0     0.000000             1         2.0   2.000000    0.000000   200.000000
                                                                 OPPO A73t (A73t)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                           OPPO CPH1729 (CPH1729)            0.0     0.000000             4         4.0   1.000000    0.000000   100.000000
                                                           Nokia HKEA (Nokia C22)            0.0     0.000000             1         2.0   2.000000    0.000000   200.000000
                                                          KDDI SOV37 (Xperia XZ2)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                                Lenovo P2a42 (P2)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                            Movitel M8424 (M8424)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                 Nokia DMN (Nokia C2 2nd Edition)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                                 GM G310 (GM 21 )            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                                 GM G312 (GM 22 )            0.0     0.000000             1         2.0   2.000000    0.000000   200.000000
                                                       Itel itel-A666L (itel P55)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                         Itel itel-L6005 (itel Vision1(P36 Play))            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                           Itel itel-W5005P (P15)            0.0     0.000000             1         3.0   3.000000    0.000000   300.000000
                                      Infinix Infinix-X6835 (Infinix HOT 30 PLAY)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                      Infinix Infinix-X6850 (Infinix NOTE 40 Pro)            0.0     0.000000             1         3.0   3.000000    0.000000   300.000000
                                         Infinix Infinix-X6851B (NOTE 40 Pro+ 5G)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                             Infinix Infinix-X687 (Infinix ZERO8)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                      Infinix Infinix-X689F (HOT)            0.0     0.000000             1         8.0   8.000000    0.000000   800.000000
                                                    Infinix Infinix-X692 (NOTE 8)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                     Infinix Infinix-X676C (Infinix NOTE 12 2023)            0.0     0.000000             2         2.0   1.000000    0.000000   100.000000
                                              Infinix Infinix-X6825 (HOT 20 PLAY)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                              vivo V2315 (Y28 5G)            0.0     0.000000             2         8.0   4.000000    0.000000   400.000000
                                                           HONOR HNNTH (HONOR 50)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                                vivo 2036 (V2031)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                                vivo 2110 (V2152)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                                vivo 2116 (V2117)            0.0     0.000000             1         1.0   1.000000    0.000000   100.000000
                                                                vivo 2131 (V2140)            0.0     0.000000             1         2.0   2.000000    0.000000   200.000000

✓ Chart saved: batch2_crash_rate_by_device.png

======================================================================
BATCH 2 COMPLETE - Crash Rate Analysis Done
======================================================================
In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300

# Load and prepare data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
df.columns = df.columns.str.strip()

# Find columns
date_col = [col for col in df.columns if 'date' in col.lower()][0]
device_col = [col for col in df.columns if 'device' in col.lower()][0] if any('device' in col.lower() for col in df.columns) else None

# Keep required columns
columns_to_keep = ['Daily ANRs', 'Android OS Version', 'Daily Crashes', date_col]
if device_col:
    columns_to_keep.append(device_col)
df = df[columns_to_keep].copy()

# Clean data
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.rename(columns={date_col: 'Date'})
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])
if device_col:
    df = df.rename(columns={device_col: 'Device'})

# Calculate OS metrics
os_metrics = df.groupby('Android OS Version').agg({
    'Daily Crashes': ['sum', 'mean', 'count'],
    'Daily ANRs': ['sum', 'mean']
}).reset_index()
os_metrics.columns = ['OS_Version', 'Total_Crashes', 'Avg_Crashes', 'Record_Count', 
                      'Total_ANRs', 'Avg_ANRs']
os_metrics['Crash_Rate'] = (os_metrics['Total_Crashes'] / os_metrics['Record_Count']) * 100
os_metrics['ANR_Rate'] = (os_metrics['Total_ANRs'] / os_metrics['Record_Count']) * 100
os_metrics = os_metrics.sort_values('Crash_Rate', ascending=False)

# PLOT 1: Crash Rate by OS Version
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Crash Rate
bars1 = axes[0].barh(range(len(os_metrics)), os_metrics['Crash_Rate'], 
                     color='#3498db', alpha=0.8, edgecolor='black')
for i in range(min(3, len(bars1))):
    bars1[i].set_color('#e74c3c')
    bars1[i].set_alpha(1.0)

axes[0].set_yticks(range(len(os_metrics)))
axes[0].set_yticklabels(os_metrics['OS_Version'], fontsize=10)
axes[0].set_xlabel('Crash Rate (% of records)', fontsize=12, fontweight='bold')
axes[0].set_title('Crash Rate by Android OS Version', fontsize=13, fontweight='bold')
axes[0].grid(True, alpha=0.3, axis='x')
axes[0].invert_yaxis()

for i, v in enumerate(os_metrics['Crash_Rate']):
    axes[0].text(v + 0.5, i, f'{v:.1f}%', va='center', fontsize=9, fontweight='bold')

# ANR Rate
bars2 = axes[1].barh(range(len(os_metrics)), os_metrics['ANR_Rate'], 
                     color='#9b59b6', alpha=0.8, edgecolor='black')
for i in range(min(3, len(bars2))):
    bars2[i].set_color('#f39c12')
    bars2[i].set_alpha(1.0)

axes[1].set_yticks(range(len(os_metrics)))
axes[1].set_yticklabels(os_metrics['OS_Version'], fontsize=10)
axes[1].set_xlabel('ANR Rate (% of records)', fontsize=12, fontweight='bold')
axes[1].set_title('ANR Rate by Android OS Version', fontsize=13, fontweight='bold')
axes[1].grid(True, alpha=0.3, axis='x')
axes[1].invert_yaxis()

for i, v in enumerate(os_metrics['ANR_Rate']):
    axes[1].text(v + 0.5, i, f'{v:.1f}%', va='center', fontsize=9, fontweight='bold')

plt.tight_layout()
plt.savefig('batch2_crash_rate_by_os.png', dpi=300, bbox_inches='tight')
plt.show()
print("✓ Chart saved: batch2_crash_rate_by_os.png")

# PLOT 2: Crash Rate by Device (if available)
if 'Device' in df.columns:
    device_metrics = df.groupby('Device').agg({
        'Daily Crashes': ['sum', 'mean', 'count'],
        'Daily ANRs': ['sum', 'mean']
    }).reset_index()
    device_metrics.columns = ['Device', 'Total_Crashes', 'Avg_Crashes', 'Record_Count', 
                              'Total_ANRs', 'Avg_ANRs']
    device_metrics['Crash_Rate'] = (device_metrics['Total_Crashes'] / device_metrics['Record_Count']) * 100
    device_metrics['ANR_Rate'] = (device_metrics['Total_ANRs'] / device_metrics['Record_Count']) * 100
    device_metrics = device_metrics.sort_values('Crash_Rate', ascending=False)
    
    top_devices = device_metrics.head(15)
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))
    
    # Crash Rate
    bars1 = axes[0].barh(range(len(top_devices)), top_devices['Crash_Rate'], 
                         color='#3498db', alpha=0.8, edgecolor='black')
    for i in range(min(5, len(bars1))):
        bars1[i].set_color('#e74c3c')
        bars1[i].set_alpha(1.0)
    
    axes[0].set_yticks(range(len(top_devices)))
    axes[0].set_yticklabels(top_devices['Device'], fontsize=9)
    axes[0].set_xlabel('Crash Rate (% of records)', fontsize=12, fontweight='bold')
    axes[0].set_title('Top 15 Devices by Crash Rate', fontsize=13, fontweight='bold')
    axes[0].grid(True, alpha=0.3, axis='x')
    axes[0].invert_yaxis()
    
    # ANR Rate
    bars2 = axes[1].barh(range(len(top_devices)), top_devices['ANR_Rate'], 
                         color='#9b59b6', alpha=0.8, edgecolor='black')
    for i in range(min(5, len(bars2))):
        bars2[i].set_color('#f39c12')
        bars2[i].set_alpha(1.0)
    
    axes[1].set_yticks(range(len(top_devices)))
    axes[1].set_yticklabels(top_devices['Device'], fontsize=9)
    axes[1].set_xlabel('ANR Rate (% of records)', fontsize=12, fontweight='bold')
    axes[1].set_title('Top 15 Devices by ANR Rate', fontsize=13, fontweight='bold')
    axes[1].grid(True, alpha=0.3, axis='x')
    axes[1].invert_yaxis()
    
    plt.tight_layout()
    plt.savefig('batch2_crash_rate_by_device.png', dpi=300, bbox_inches='tight')
    plt.show()
    print("✓ Chart saved: batch2_crash_rate_by_device.png")
No description has been provided for this image
✓ Chart saved: batch2_crash_rate_by_os.png
No description has been provided for this image
✓ Chart saved: batch2_crash_rate_by_device.png
In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300

print("="*70)
print("BATCH 3: TOP OFFENDING OS VERSIONS & DEVICES")
print("="*70)

# Load the data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
df.columns = df.columns.str.strip()

# Find columns
date_col = [col for col in df.columns if 'date' in col.lower()][0]
device_col = [col for col in df.columns if 'device' in col.lower()][0] if any('device' in col.lower() for col in df.columns) else None

# Keep only required columns
columns_to_keep = ['Daily ANRs', 'Android OS Version', 'Daily Crashes', date_col]
if device_col:
    columns_to_keep.append(device_col)

df = df[columns_to_keep].copy()

# Clean data
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.rename(columns={date_col: 'Date'})
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])

if device_col:
    df = df.rename(columns={device_col: 'Device'})

print(f"Dataset loaded: {len(df):,} records")

# ===========================================================================
# 4. TOP OFFENDING ANDROID OS VERSIONS
# ===========================================================================

print("\n4. TOP OFFENDING ANDROID OS VERSIONS")
print("-" * 70)

os_detailed = df.groupby('Android OS Version').agg({
    'Daily Crashes': ['sum', 'mean', 'count'],
    'Daily ANRs': ['sum', 'mean']
}).reset_index()

os_detailed.columns = ['OS_Version', 'Total_Crashes', 'Avg_Crashes', 'Record_Count', 
                       'Total_ANRs', 'Avg_ANRs']

os_detailed['Crash_Rate'] = (os_detailed['Total_Crashes'] / os_detailed['Record_Count']) * 100

# A. Top by ABSOLUTE crash count
top_absolute_os = os_detailed.nlargest(10, 'Total_Crashes')

print("\n TOP 10 OS VERSIONS BY ABSOLUTE CRASH COUNT:")
print(top_absolute_os[['OS_Version', 'Total_Crashes', 'Total_ANRs', 'Record_Count', 'Crash_Rate']].to_string(index=False))

# B. Top by RELATIVE crash rate (minimum 10 records)
top_relative_os = os_detailed[os_detailed['Record_Count'] >= 10].nlargest(10, 'Crash_Rate')

print("\n\n TOP 10 OS VERSIONS BY RELATIVE CRASH RATE (min 10 records):")
print(top_relative_os[['OS_Version', 'Crash_Rate', 'Total_Crashes', 'Record_Count']].to_string(index=False))

# Visualization - Top Offending OS Versions
fig, axes = plt.subplots(1, 2, figsize=(16, 8))

# Absolute crashes
colors_abs = ['#e74c3c' if i < 3 else '#3498db' for i in range(len(top_absolute_os))]
bars1 = axes[0].barh(range(len(top_absolute_os)), top_absolute_os['Total_Crashes'], 
                     color=colors_abs, alpha=0.8, edgecolor='black')
axes[0].set_yticks(range(len(top_absolute_os)))
axes[0].set_yticklabels(top_absolute_os['OS_Version'], fontsize=10)
axes[0].set_xlabel('Total Crashes', fontsize=12, fontweight='bold')
axes[0].set_title('Top 10 OS: Highest Absolute Crashes', fontsize=13, fontweight='bold')
axes[0].invert_yaxis()
axes[0].grid(True, alpha=0.3, axis='x')

# Add value labels
for i, v in enumerate(top_absolute_os['Total_Crashes']):
    axes[0].text(v + (v*0.02), i, f'{v:,.0f}', va='center', fontsize=9, fontweight='bold')

# Relative crash rate
colors_rel = ['#e74c3c' if i < 3 else '#f39c12' for i in range(len(top_relative_os))]
bars2 = axes[1].barh(range(len(top_relative_os)), top_relative_os['Crash_Rate'], 
                     color=colors_rel, alpha=0.8, edgecolor='black')
axes[1].set_yticks(range(len(top_relative_os)))
axes[1].set_yticklabels(top_relative_os['OS_Version'], fontsize=10)
axes[1].set_xlabel('Crash Rate (%)', fontsize=12, fontweight='bold')
axes[1].set_title('Top 10 OS: Highest Relative Crash Rate', fontsize=13, fontweight='bold')
axes[1].invert_yaxis()
axes[1].grid(True, alpha=0.3, axis='x')

# Add value labels
for i, v in enumerate(top_relative_os['Crash_Rate']):
    axes[1].text(v + (v*0.02), i, f'{v:.1f}%', va='center', fontsize=9, fontweight='bold')

plt.tight_layout()
plt.savefig('batch3_top_offending_os.png', dpi=300, bbox_inches='tight')
print("\n✓ Chart saved: batch3_top_offending_os.png")
plt.close()

# ===========================================================================
# 5. TOP OFFENDING DEVICES (if Device column exists)
# ===========================================================================

if 'Device' in df.columns:
    print("\n\n5. TOP OFFENDING DEVICES")
    print("-" * 70)
    
    device_detailed = df.groupby('Device').agg({
        'Daily Crashes': ['sum', 'mean', 'count'],
        'Daily ANRs': ['sum', 'mean']
    }).reset_index()
    
    device_detailed.columns = ['Device', 'Total_Crashes', 'Avg_Crashes', 'Record_Count', 
                               'Total_ANRs', 'Avg_ANRs']
    
    device_detailed['Crash_Rate'] = (device_detailed['Total_Crashes'] / device_detailed['Record_Count']) * 100
    
    # A. Top by ABSOLUTE crash count
    top_absolute_device = device_detailed.nlargest(15, 'Total_Crashes')
    
    print("\n📊 TOP 15 DEVICES BY ABSOLUTE CRASH COUNT:")
    print(top_absolute_device[['Device', 'Total_Crashes', 'Total_ANRs', 'Record_Count', 'Crash_Rate']].to_string(index=False))
    
    # B. Top by RELATIVE crash rate (minimum 10 records)
    top_relative_device = device_detailed[device_detailed['Record_Count'] >= 10].nlargest(15, 'Crash_Rate')
    
    print("\n\n📊 TOP 15 DEVICES BY RELATIVE CRASH RATE (min 10 records):")
    print(top_relative_device[['Device', 'Crash_Rate', 'Total_Crashes', 'Record_Count']].to_string(index=False))
    
    # Visualization - Top Offending Devices
    fig, axes = plt.subplots(1, 2, figsize=(16, 10))
    
    # Absolute crashes
    colors_abs = ['#e74c3c' if i < 5 else '#3498db' for i in range(len(top_absolute_device))]
    bars1 = axes[0].barh(range(len(top_absolute_device)), top_absolute_device['Total_Crashes'], 
                         color=colors_abs, alpha=0.8, edgecolor='black')
    axes[0].set_yticks(range(len(top_absolute_device)))
    axes[0].set_yticklabels(top_absolute_device['Device'], fontsize=9)
    axes[0].set_xlabel('Total Crashes', fontsize=12, fontweight='bold')
    axes[0].set_title('Top 15 Devices: Highest Absolute Crashes', fontsize=13, fontweight='bold')
    axes[0].invert_yaxis()
    axes[0].grid(True, alpha=0.3, axis='x')
    
    # Add value labels
    for i, v in enumerate(top_absolute_device['Total_Crashes']):
        axes[0].text(v + (v*0.02), i, f'{v:,.0f}', va='center', fontsize=8, fontweight='bold')
    
    # Relative crash rate
    colors_rel = ['#e74c3c' if i < 5 else '#f39c12' for i in range(len(top_relative_device))]
    bars2 = axes[1].barh(range(len(top_relative_device)), top_relative_device['Crash_Rate'], 
                         color=colors_rel, alpha=0.8, edgecolor='black')
    axes[1].set_yticks(range(len(top_relative_device)))
    axes[1].set_yticklabels(top_relative_device['Device'], fontsize=9)
    axes[1].set_xlabel('Crash Rate (%)', fontsize=12, fontweight='bold')
    axes[1].set_title('Top 15 Devices: Highest Relative Crash Rate', fontsize=13, fontweight='bold')
    axes[1].invert_yaxis()
    axes[1].grid(True, alpha=0.3, axis='x')
    
    # Add value labels
    for i, v in enumerate(top_relative_device['Crash_Rate']):
        axes[1].text(v + (v*0.02), i, f'{v:.1f}%', va='center', fontsize=8, fontweight='bold')
    
    plt.tight_layout()
    plt.savefig('batch3_top_offending_devices.png', dpi=300, bbox_inches='tight')
    print("\n✓ Chart saved: batch3_top_offending_devices.png")
    plt.close()

else:
    print("\n\n5. DEVICE ANALYSIS SKIPPED")
    print("-" * 70)
    print("No Device column found in dataset.")

# Summary of top offenders
print("\n\n" + "="*70)
print("SUMMARY: KEY OFFENDERS IDENTIFIED")
print("="*70)

print(f"\n WORST OS VERSION (Absolute): {top_absolute_os.iloc[0]['OS_Version']}")
print(f"   Total Crashes: {top_absolute_os.iloc[0]['Total_Crashes']:,.0f}")
print(f"   Crash Rate: {top_absolute_os.iloc[0]['Crash_Rate']:.2f}%")

print(f"\n WORST OS VERSION (Relative): {top_relative_os.iloc[0]['OS_Version']}")
print(f"   Crash Rate: {top_relative_os.iloc[0]['Crash_Rate']:.2f}%")
print(f"   Total Crashes: {top_relative_os.iloc[0]['Total_Crashes']:,.0f}")

if 'Device' in df.columns:
    print(f"\n WORST DEVICE (Absolute): {top_absolute_device.iloc[0]['Device']}")
    print(f"   Total Crashes: {top_absolute_device.iloc[0]['Total_Crashes']:,.0f}")
======================================================================
BATCH 3: TOP OFFENDING OS VERSIONS & DEVICES
======================================================================
Dataset loaded: 3,391 records

4. TOP OFFENDING ANDROID OS VERSIONS
----------------------------------------------------------------------

 TOP 10 OS VERSIONS BY ABSOLUTE CRASH COUNT:
 OS_Version  Total_Crashes  Total_ANRs  Record_Count  Crash_Rate
 Android 14         1076.0      4176.0            28 3842.857143
  Android 9          679.0       182.0            28 2425.000000
 Android 13          381.0       325.0            28 1360.714286
 Android 12          290.0       562.0            28 1035.714286
 Android 11          238.0       375.0            28  850.000000
 Android 10          195.0       330.0            28  696.428571
Android 8.1          135.0       243.0            28  482.142857
Android 7.1           93.0        64.0            23  404.347826
Android 8.0           49.0        21.0            22  222.727273
 Android 15           44.0       180.0            28  157.142857


 TOP 10 OS VERSIONS BY RELATIVE CRASH RATE (min 10 records):
 OS_Version  Crash_Rate  Total_Crashes  Record_Count
 Android 14 3842.857143         1076.0            28
  Android 9 2425.000000          679.0            28
 Android 13 1360.714286          381.0            28
 Android 12 1035.714286          290.0            28
 Android 11  850.000000          238.0            28
 Android 10  696.428571          195.0            28
Android 8.1  482.142857          135.0            28
Android 7.1  404.347826           93.0            23
Android 8.0  222.727273           49.0            22
 Android 15  157.142857           44.0            28

✓ Chart saved: batch3_top_offending_os.png


5. TOP OFFENDING DEVICES
----------------------------------------------------------------------

📊 TOP 15 DEVICES BY ABSOLUTE CRASH COUNT:
                         Device  Total_Crashes  Total_ANRs  Record_Count  Crash_Rate
          Redmi fire (Redmi 12)           98.0       202.0            13  753.846154
            BLU B130DL (B131DL)           86.0       323.0             8 1075.000000
samsung j4primelte (Galaxy J4+)           85.0       163.0            11  772.727273
     motorola cancun (moto g14)           79.0       154.0             5 1580.000000
        POCO vayu (POCO X3 Pro)           66.0       161.0             5 1320.000000
     motorola malta (moto e(7))           64.0       138.0            13  492.307692
    Redmi miel (Redmi Note 11S)           63.0       185.0             2 3150.000000
   HONOR HWLLD-H (Honor 9 Lite)           62.0       165.0            10  620.000000
    Redmi topaz (Redmi Note 12)           61.0        33.0            11  554.545455
    Redmi tapas (Redmi Note 12)           58.0        61.0            14  414.285714
          vivo 1904 (vivo 1904)           58.0        20.0            13  446.153846
       samsung a10 (Galaxy A10)           57.0       157.0             5 1140.000000
              vivo 2027 (V2029)           55.0       166.0             5 1100.000000
  POCO duchamp (POCO X6 Pro 5G)           52.0        18.0             6  866.666667
               vivo V2333 (Y03)           51.0       171.0            13  392.307692


📊 TOP 15 DEVICES BY RELATIVE CRASH RATE (min 10 records):
                            Device  Crash_Rate  Total_Crashes  Record_Count
   samsung j4primelte (Galaxy J4+)  772.727273           85.0            11
             Redmi fire (Redmi 12)  753.846154           98.0            13
      HONOR HWLLD-H (Honor 9 Lite)  620.000000           62.0            10
       Redmi topaz (Redmi Note 12)  554.545455           61.0            11
        motorola malta (moto e(7))  492.307692           64.0            13
             vivo 1904 (vivo 1904)  446.153846           58.0            13
       Redmi tapas (Redmi Note 12)  414.285714           58.0            14
samsung j2corelte (Galaxy J2 Core)  400.000000           40.0            10
                  vivo V2333 (Y03)  392.307692           51.0            13
      motorola bali (moto e6 play)  358.333333           43.0            12
  Infinix Infinix-X688B (HOT Play)  350.000000           35.0            10
      samsung a14x (Galaxy A14 5G)  342.857143           48.0            14
         samsung a05m (Galaxy A05)  320.000000           48.0            15
          samsung a14 (Galaxy A14)  320.000000           48.0            15
samsung j4corelte (Galaxy J4 Core)  308.333333           37.0            12

✓ Chart saved: batch3_top_offending_devices.png


======================================================================
SUMMARY: KEY OFFENDERS IDENTIFIED
======================================================================

 WORST OS VERSION (Absolute): Android 14
   Total Crashes: 1,076
   Crash Rate: 3842.86%

 WORST OS VERSION (Relative): Android 14
   Crash Rate: 3842.86%
   Total Crashes: 1,076

 WORST DEVICE (Absolute): Redmi fire (Redmi 12)
   Total Crashes: 98
In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300

# Load and prepare data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
df.columns = df.columns.str.strip()

# Find columns
date_col = [col for col in df.columns if 'date' in col.lower()][0]
device_col = [col for col in df.columns if 'device' in col.lower()][0] if any('device' in col.lower() for col in df.columns) else None

# Keep required columns
columns_to_keep = ['Daily ANRs', 'Android OS Version', 'Daily Crashes', date_col]
if device_col:
    columns_to_keep.append(device_col)
df = df[columns_to_keep].copy()

# Clean data
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.rename(columns={date_col: 'Date'})
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])
if device_col:
    df = df.rename(columns={device_col: 'Device'})

# Calculate OS detailed metrics
os_detailed = df.groupby('Android OS Version').agg({
    'Daily Crashes': ['sum', 'mean', 'count'],
    'Daily ANRs': ['sum', 'mean']
}).reset_index()
os_detailed.columns = ['OS_Version', 'Total_Crashes', 'Avg_Crashes', 'Record_Count', 
                       'Total_ANRs', 'Avg_ANRs']
os_detailed['Crash_Rate'] = (os_detailed['Total_Crashes'] / os_detailed['Record_Count']) * 100

# Top OS by absolute and relative
top_absolute_os = os_detailed.nlargest(10, 'Total_Crashes')
top_relative_os = os_detailed[os_detailed['Record_Count'] >= 10].nlargest(10, 'Crash_Rate')

# PLOT 1: Top Offending OS Versions
fig, axes = plt.subplots(1, 2, figsize=(16, 8))

# Absolute crashes
colors_abs = ['#e74c3c' if i < 3 else '#3498db' for i in range(len(top_absolute_os))]
bars1 = axes[0].barh(range(len(top_absolute_os)), top_absolute_os['Total_Crashes'], 
                     color=colors_abs, alpha=0.8, edgecolor='black')
axes[0].set_yticks(range(len(top_absolute_os)))
axes[0].set_yticklabels(top_absolute_os['OS_Version'], fontsize=10)
axes[0].set_xlabel('Total Crashes', fontsize=12, fontweight='bold')
axes[0].set_title('Top 10 OS: Highest Absolute Crashes', fontsize=13, fontweight='bold')
axes[0].invert_yaxis()
axes[0].grid(True, alpha=0.3, axis='x')

for i, v in enumerate(top_absolute_os['Total_Crashes']):
    axes[0].text(v + (v*0.02), i, f'{v:,.0f}', va='center', fontsize=9, fontweight='bold')

# Relative crash rate
colors_rel = ['#e74c3c' if i < 3 else '#f39c12' for i in range(len(top_relative_os))]
bars2 = axes[1].barh(range(len(top_relative_os)), top_relative_os['Crash_Rate'], 
                     color=colors_rel, alpha=0.8, edgecolor='black')
axes[1].set_yticks(range(len(top_relative_os)))
axes[1].set_yticklabels(top_relative_os['OS_Version'], fontsize=10)
axes[1].set_xlabel('Crash Rate (%)', fontsize=12, fontweight='bold')
axes[1].set_title('Top 10 OS: Highest Relative Crash Rate', fontsize=13, fontweight='bold')
axes[1].invert_yaxis()
axes[1].grid(True, alpha=0.3, axis='x')

for i, v in enumerate(top_relative_os['Crash_Rate']):
    axes[1].text(v + (v*0.02), i, f'{v:.1f}%', va='center', fontsize=9, fontweight='bold')

plt.tight_layout()
plt.savefig('batch3_top_offending_os.png', dpi=300, bbox_inches='tight')
plt.show()
print("✓ Chart saved: batch3_top_offending_os.png")

# PLOT 2: Top Offending Devices (if available)
if 'Device' in df.columns:
    device_detailed = df.groupby('Device').agg({
        'Daily Crashes': ['sum', 'mean', 'count'],
        'Daily ANRs': ['sum', 'mean']
    }).reset_index()
    device_detailed.columns = ['Device', 'Total_Crashes', 'Avg_Crashes', 'Record_Count', 
                               'Total_ANRs', 'Avg_ANRs']
    device_detailed['Crash_Rate'] = (device_detailed['Total_Crashes'] / device_detailed['Record_Count']) * 100
    
    top_absolute_device = device_detailed.nlargest(15, 'Total_Crashes')
    top_relative_device = device_detailed[device_detailed['Record_Count'] >= 10].nlargest(15, 'Crash_Rate')
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 10))
    
    # Absolute crashes
    colors_abs = ['#e74c3c' if i < 5 else '#3498db' for i in range(len(top_absolute_device))]
    bars1 = axes[0].barh(range(len(top_absolute_device)), top_absolute_device['Total_Crashes'], 
                         color=colors_abs, alpha=0.8, edgecolor='black')
    axes[0].set_yticks(range(len(top_absolute_device)))
    axes[0].set_yticklabels(top_absolute_device['Device'], fontsize=9)
    axes[0].set_xlabel('Total Crashes', fontsize=12, fontweight='bold')
    axes[0].set_title('Top 15 Devices: Highest Absolute Crashes', fontsize=13, fontweight='bold')
    axes[0].invert_yaxis()
    axes[0].grid(True, alpha=0.3, axis='x')
    
    for i, v in enumerate(top_absolute_device['Total_Crashes']):
        axes[0].text(v + (v*0.02), i, f'{v:,.0f}', va='center', fontsize=8, fontweight='bold')
    
    # Relative crash rate
    colors_rel = ['#e74c3c' if i < 5 else '#f39c12' for i in range(len(top_relative_device))]
    bars2 = axes[1].barh(range(len(top_relative_device)), top_relative_device['Crash_Rate'], 
                         color=colors_rel, alpha=0.8, edgecolor='black')
    axes[1].set_yticks(range(len(top_relative_device)))
    axes[1].set_yticklabels(top_relative_device['Device'], fontsize=9)
    axes[1].set_xlabel('Crash Rate (%)', fontsize=12, fontweight='bold')
    axes[1].set_title('Top 15 Devices: Highest Relative Crash Rate', fontsize=13, fontweight='bold')
    axes[1].invert_yaxis()
    axes[1].grid(True, alpha=0.3, axis='x')
    
    for i, v in enumerate(top_relative_device['Crash_Rate']):
        axes[1].text(v + (v*0.02), i, f'{v:.1f}%', va='center', fontsize=8, fontweight='bold')
    
    plt.tight_layout()
    plt.savefig('batch3_top_offending_devices.png', dpi=300, bbox_inches='tight')
    plt.show()
    print("✓ Chart saved: batch3_top_offending_devices.png")
No description has been provided for this image
✓ Chart saved: batch3_top_offending_os.png
No description has been provided for this image
✓ Chart saved: batch3_top_offending_devices.png

Data-forensic analyses (how to use Crashes, ANRs, Device type, OS version)

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings('ignore')

# Load the data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")

# Data preprocessing
df.columns = df.columns.str.strip()
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])

print("="*70)
print("B. ANOMALY DETECTION & STATISTICAL ANALYSIS")
print("="*70)

# 1. GROUP COMPARISON: Crash and ANR rates by Device Type and OS Version
print("\n1. GROUP COMPARISON ANALYSIS")
print("-" * 70)

device_col = 'Device Type' if 'Device Type' in df.columns else 'Device Model'

if device_col in df.columns:
    # By Device Type
    device_comparison = df.groupby(device_col).agg({
        'Daily Crashes': ['sum', 'mean', 'std'],
        'Daily ANRs': ['sum', 'mean', 'std'],
        device_col: 'count'
    }).reset_index()
    
    device_comparison.columns = [device_col, 'Total_Crashes', 'Avg_Crashes', 'Std_Crashes',
                                  'Total_ANRs', 'Avg_ANRs', 'Std_ANRs', 'Sample_Size']
    
    device_comparison['Crash_Rate'] = (device_comparison['Total_Crashes'] / 
                                        device_comparison['Sample_Size']) * 100
    device_comparison['ANR_Rate'] = (device_comparison['Total_ANRs'] / 
                                      device_comparison['Sample_Size']) * 100
    
    print("\n📊 Device Type Comparison:")
    print(device_comparison[[device_col, 'Sample_Size', 'Avg_Crashes', 'Avg_ANRs', 
                             'Crash_Rate', 'ANR_Rate']].to_string(index=False))

# By OS Version
os_comparison = df.groupby('Android OS Version').agg({
    'Daily Crashes': ['sum', 'mean', 'std'],
    'Daily ANRs': ['sum', 'mean', 'std'],
    'Android OS Version': 'count'
}).reset_index()

os_comparison.columns = ['OS_Version', 'Total_Crashes', 'Avg_Crashes', 'Std_Crashes',
                         'Total_ANRs', 'Avg_ANRs', 'Std_ANRs', 'Sample_Size']

os_comparison['Crash_Rate'] = (os_comparison['Total_Crashes'] / 
                                os_comparison['Sample_Size']) * 100
os_comparison['ANR_Rate'] = (os_comparison['Total_ANRs'] / 
                              os_comparison['Sample_Size']) * 100

print("\n📊 OS Version Comparison:")
print(os_comparison[['OS_Version', 'Sample_Size', 'Avg_Crashes', 'Avg_ANRs', 
                     'Crash_Rate', 'ANR_Rate']].to_string(index=False))

# Visualization - Group Comparison
fig, axes = plt.subplots(2, 2, figsize=(16, 10))

# Device Type - Crashes
if device_col in df.columns:
    axes[0, 0].bar(device_comparison[device_col], device_comparison['Avg_Crashes'], 
                   color='#e74c3c', alpha=0.7)
    axes[0, 0].set_title('Average Crashes by Device Type', fontsize=12, fontweight='bold')
    axes[0, 0].set_ylabel('Avg Crashes')
    axes[0, 0].tick_params(axis='x', rotation=45)
    axes[0, 0].grid(True, alpha=0.3, axis='y')

    # Device Type - ANRs
    axes[0, 1].bar(device_comparison[device_col], device_comparison['Avg_ANRs'], 
                   color='#f39c12', alpha=0.7)
    axes[0, 1].set_title('Average ANRs by Device Type', fontsize=12, fontweight='bold')
    axes[0, 1].set_ylabel('Avg ANRs')
    axes[0, 1].tick_params(axis='x', rotation=45)
    axes[0, 1].grid(True, alpha=0.3, axis='y')

# OS Version - Crashes
axes[1, 0].bar(os_comparison['OS_Version'], os_comparison['Avg_Crashes'], 
               color='#3498db', alpha=0.7)
axes[1, 0].set_title('Average Crashes by OS Version', fontsize=12, fontweight='bold')
axes[1, 0].set_ylabel('Avg Crashes')
axes[1, 0].tick_params(axis='x', rotation=45)
axes[1, 0].grid(True, alpha=0.3, axis='y')

# OS Version - ANRs
axes[1, 1].bar(os_comparison['OS_Version'], os_comparison['Avg_ANRs'], 
               color='#9b59b6', alpha=0.7)
axes[1, 1].set_title('Average ANRs by OS Version', fontsize=12, fontweight='bold')
axes[1, 1].set_ylabel('Avg ANRs')
axes[1, 1].tick_params(axis='x', rotation=45)
axes[1, 1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('group_comparison.png', dpi=300, bbox_inches='tight')
print("\n✓ Chart saved: group_comparison.png")

# 2. CHI-SQUARE TEST: Association between Crash occurrence and Device Type
print("\n\n2. CHI-SQUARE TEST (ASSOCIATION ANALYSIS)")
print("-" * 70)

# Create binary crash indicator
df['Has_Crash'] = (df['Daily Crashes'] > 0).astype(int)

if device_col in df.columns:
    # Contingency table
    contingency_table = pd.crosstab(df[device_col], df['Has_Crash'], 
                                     margins=True, margins_name='Total')
    
    print("\n📋 Contingency Table: Crash Occurrence vs Device Type")
    print(contingency_table)
    
    # Chi-square test
    contingency_for_test = pd.crosstab(df[device_col], df['Has_Crash'])
    chi2, p_value, dof, expected = stats.chi2_contingency(contingency_for_test)
    
    print("\n🔬 Chi-Square Test Results:")
    print(f"   Chi-Square Statistic: {chi2:.4f}")
    print(f"   P-value: {p_value:.6f}")
    print(f"   Degrees of Freedom: {dof}")
    
    alpha = 0.05
    if p_value < alpha:
        print(f"\n✓ Result: SIGNIFICANT association detected (p < {alpha})")
        print(f"   There IS a statistically significant relationship between")
        print(f"   Device Type and Crash occurrence.")
    else:
        print(f"\n✗ Result: NO significant association (p >= {alpha})")
        print(f"   Device Type and Crash occurrence are independent.")
    
    # Effect size (Cramér's V)
    n = contingency_for_test.sum().sum()
    cramers_v = np.sqrt(chi2 / (n * (min(contingency_for_test.shape) - 1)))
    print(f"   Cramér's V (effect size): {cramers_v:.4f}")
    
    if cramers_v < 0.1:
        effect = "negligible"
    elif cramers_v < 0.3:
        effect = "small"
    elif cramers_v < 0.5:
        effect = "medium"
    else:
        effect = "large"
    print(f"   Effect size interpretation: {effect}")

# 3. LOGISTIC REGRESSION: Predict crash probability
print("\n\n3. LOGISTIC REGRESSION ANALYSIS")
print("-" * 70)

# Prepare features
df['Hour'] = df['Date'].dt.hour
df['DayOfWeek'] = df['Date'].dt.dayofweek
df['Month'] = df['Date'].dt.month

# Encode categorical variables
le_device = LabelEncoder()
le_os = LabelEncoder()

if device_col in df.columns:
    df['Device_Encoded'] = le_device.fit_transform(df[device_col].astype(str))
df['OS_Encoded'] = le_os.fit_transform(df['Android OS Version'].astype(str))

# Select features
feature_cols = ['OS_Encoded', 'Hour', 'Month', 'DayOfWeek']
if device_col in df.columns:
    feature_cols.insert(0, 'Device_Encoded')

# Handle missing values
df_model = df[feature_cols + ['Has_Crash']].dropna()

X = df_model[feature_cols]
y = df_model['Has_Crash']

# Train logistic regression
log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg.fit(X, y)

# Results
print("\n📈 Logistic Regression Model Performance:")
print(f"   Model Accuracy: {log_reg.score(X, y):.4f}")
print(f"   Baseline (all no-crash): {1 - y.mean():.4f}")

# Coefficients and odds ratios
coef_df = pd.DataFrame({
    'Feature': feature_cols,
    'Coefficient': log_reg.coef_[0],
    'Odds_Ratio': np.exp(log_reg.coef_[0])
})
coef_df = coef_df.sort_values('Coefficient', ascending=False)

print("\n📊 Feature Coefficients & Odds Ratios:")
print(coef_df.to_string(index=False))

print("\n🔍 Interpretation:")
for idx, row in coef_df.iterrows():
    if row['Coefficient'] > 0.1:
        pct_increase = (row['Odds_Ratio'] - 1) * 100
        print(f"   • {row['Feature']}: INCREASES crash odds by {pct_increase:.1f}%")
    elif row['Coefficient'] < -0.1:
        pct_decrease = (1 - row['Odds_Ratio']) * 100
        print(f"   • {row['Feature']}: DECREASES crash odds by {pct_decrease:.1f}%")

# Visualization - Odds Ratios
fig, ax = plt.subplots(figsize=(10, 6))
colors = ['#e74c3c' if x > 1 else '#27ae60' for x in coef_df['Odds_Ratio']]
bars = ax.barh(coef_df['Feature'], coef_df['Odds_Ratio'], color=colors, alpha=0.7)
ax.axvline(x=1, color='black', linestyle='--', linewidth=2, label='No Effect')
ax.set_xlabel('Odds Ratio', fontsize=12, fontweight='bold')
ax.set_title('Logistic Regression: Feature Impact on Crash Probability', 
             fontsize=13, fontweight='bold')
ax.set_xlim(0.5, max(coef_df['Odds_Ratio'].max() * 1.1, 1.5))
ax.grid(True, alpha=0.3, axis='x')
ax.legend()

plt.tight_layout()
plt.savefig('logistic_regression_odds.png', dpi=300, bbox_inches='tight')
print("\n✓ Chart saved: logistic_regression_odds.png")

# 4. ANOMALY DETECTION (Z-Score Method)
print("\n\n4. ANOMALY DETECTION")
print("-" * 70)

# Calculate z-scores for crashes
df['Crash_ZScore'] = np.abs(stats.zscore(df['Daily Crashes']))
df['ANR_ZScore'] = np.abs(stats.zscore(df['Daily ANRs']))

# Identify anomalies (z-score > 3)
crash_anomalies = df[df['Crash_ZScore'] > 3].sort_values('Daily Crashes', ascending=False)
anr_anomalies = df[df['ANR_ZScore'] > 3].sort_values('Daily ANRs', ascending=False)

print(f"\n⚠ Crash Anomalies Detected: {len(crash_anomalies)}")
if len(crash_anomalies) > 0:
    print("\nTop 5 Crash Anomalies:")
    cols_to_show = ['Date', 'Daily Crashes', device_col if device_col in df.columns else 'Android OS Version', 
                    'Android OS Version', 'Crash_ZScore']
    cols_to_show = [c for c in cols_to_show if c in crash_anomalies.columns]
    print(crash_anomalies[cols_to_show].head(5).to_string(index=False))

print(f"\n⚠ ANR Anomalies Detected: {len(anr_anomalies)}")
if len(anr_anomalies) > 0:
    print("\nTop 5 ANR Anomalies:")
    cols_to_show = ['Date', 'Daily ANRs', device_col if device_col in df.columns else 'Android OS Version', 
                    'Android OS Version', 'ANR_ZScore']
    cols_to_show = [c for c in cols_to_show if c in anr_anomalies.columns]
    print(anr_anomalies[cols_to_show].head(5).to_string(index=False))

# Visualization - Anomalies
fig, axes = plt.subplots(2, 1, figsize=(14, 8))

# Crash anomalies
axes[0].scatter(df['Date'], df['Daily Crashes'], alpha=0.5, s=30, c='#3498db', label='Normal')
if len(crash_anomalies) > 0:
    axes[0].scatter(crash_anomalies['Date'], crash_anomalies['Daily Crashes'], 
                    color='#e74c3c', s=100, marker='X', label='Anomaly', zorder=5)
axes[0].set_title('Crash Anomaly Detection (Z-Score > 3)', fontsize=13, fontweight='bold')
axes[0].set_ylabel('Daily Crashes')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# ANR anomalies
axes[1].scatter(df['Date'], df['Daily ANRs'], alpha=0.5, s=30, c='#9b59b6', label='Normal')
if len(anr_anomalies) > 0:
    axes[1].scatter(anr_anomalies['Date'], anr_anomalies['Daily ANRs'], 
                    color='#e74c3c', s=100, marker='X', label='Anomaly', zorder=5)
axes[1].set_title('ANR Anomaly Detection (Z-Score > 3)', fontsize=13, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Daily ANRs')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('anomaly_detection.png', dpi=300, bbox_inches='tight')
print("\n✓ Chart saved: anomaly_detection.png")

print("\n" + "="*70)
print("ANOMALY DETECTION & STATISTICAL ANALYSIS COMPLETE")
print("="*70)
======================================================================
B. ANOMALY DETECTION & STATISTICAL ANALYSIS
======================================================================

1. GROUP COMPARISON ANALYSIS
----------------------------------------------------------------------

📊 OS Version Comparison:
 OS_Version  Sample_Size  Avg_Crashes   Avg_ANRs  Crash_Rate     ANR_Rate
 Android 10           28     6.964286  11.785714  696.428571  1178.571429
 Android 11           28     8.500000  13.392857  850.000000  1339.285714
 Android 12           28    10.357143  20.071429 1035.714286  2007.142857
Android 12L            2     0.000000   1.000000    0.000000   100.000000
 Android 13           28    13.607143  11.607143 1360.714286  1160.714286
 Android 14           28    38.428571 149.142857 3842.857143 14914.285714
 Android 15           28     1.571429   6.428571  157.142857   642.857143
Android 6.0           21     0.761905   2.285714   76.190476   228.571429
Android 7.0           17     0.352941   2.058824   35.294118   205.882353
Android 7.1           23     4.043478   2.782609  404.347826   278.260870
Android 8.0           22     2.227273   0.954545  222.727273    95.454545
Android 8.1           28     4.821429   8.678571  482.142857   867.857143
  Android 9           28    24.250000   6.500000 2425.000000   650.000000

✓ Chart saved: group_comparison.png


2. CHI-SQUARE TEST (ASSOCIATION ANALYSIS)
----------------------------------------------------------------------


3. LOGISTIC REGRESSION ANALYSIS
----------------------------------------------------------------------

📈 Logistic Regression Model Performance:
   Model Accuracy: 0.6609
   Baseline (all no-crash): 0.3391

📊 Feature Coefficients & Odds Ratios:
   Feature  Coefficient  Odds_Ratio
 DayOfWeek     0.045513    1.046564
      Hour     0.000000    1.000000
     Month    -0.084126    0.919316
OS_Encoded    -0.130480    0.877674

🔍 Interpretation:
   • OS_Encoded: DECREASES crash odds by 12.2%

✓ Chart saved: logistic_regression_odds.png


4. ANOMALY DETECTION
----------------------------------------------------------------------

⚠ Crash Anomalies Detected: 66

Top 5 Crash Anomalies:
      Date  Daily Crashes Android OS Version Android OS Version  Crash_ZScore
2025-02-21           77.0         Android 14         Android 14     14.895559
2025-02-22           76.0         Android 14         Android 14     14.696837
2025-02-23           72.0         Android 14         Android 14     13.901947
2025-02-24           62.0         Android 14         Android 14     11.914724
2025-02-19           54.0         Android 14         Android 14     10.324946

⚠ ANR Anomalies Detected: 28

Top 5 ANR Anomalies:
      Date  Daily ANRs Android OS Version Android OS Version  ANR_ZScore
2025-02-24       185.0         Android 14         Android 14   13.194475
2025-02-22       181.0         Android 14         Android 14   12.904970
2025-02-13       171.0         Android 14         Android 14   12.181208
2025-02-25       165.0         Android 14         Android 14   11.746951
2025-02-23       160.0         Android 14         Android 14   11.385070

✓ Chart saved: anomaly_detection.png

======================================================================
ANOMALY DETECTION & STATISTICAL ANALYSIS COMPLETE
======================================================================
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
 

NDPR Compliance:DPIA and Breach Evidence Analysis

In [3]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300

print("="*80)
print("NDPR/PRIVACY COMPLIANCE ANALYSIS - BATCH 3")
print("DPIA (DATA PROTECTION IMPACT ASSESSMENT) & BREACH EVIDENCE")
print("="*80)

# Load the data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
df.columns = df.columns.str.strip()

# Find columns
date_col = [col for col in df.columns if 'date' in col.lower()][0]
device_col = [col for col in df.columns if 'device' in col.lower()][0] if any('device' in col.lower() for col in df.columns) else None

# Keep only required columns
columns_to_keep = ['Daily ANRs', 'Android OS Version', 'Daily Crashes', date_col]
if device_col:
    columns_to_keep.append(device_col)

df = df[columns_to_keep].copy()

# Clean data
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.rename(columns={date_col: 'Date'})
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])

if device_col:
    df = df.rename(columns={device_col: 'Device'})

print(f"\nDataset loaded: {len(df):,} records")
print(f"Date range: {df['Date'].min()} to {df['Date'].max()}")

# ===========================================================================
# 1. DATA PROTECTION IMPACT ASSESSMENT (DPIA)
# ===========================================================================

print("\n" + "="*80)
print("1. DATA PROTECTION IMPACT ASSESSMENT (DPIA)")
print("="*80)

print(f"\n📋 DPIA FRAMEWORK - NDPR Article 25 Compliance")
print(f"\n   A DPIA is REQUIRED when processing:")
print(f"   • Involves systematic monitoring")
print(f"   • Processes personal data at large scale")
print(f"   • Uses automated decision-making")
print(f"   • Processes sensitive or criminal data")

# Risk assessment based on data characteristics
risk_score = 0
risk_factors = []

print(f"\n🔍 RISK ASSESSMENT:")

# Factor 1: Volume of data
total_records = len(df)
if total_records > 10000:
    risk_score += 3
    risk_factors.append("High volume of records (>10,000)")
    print(f"   ⚠️  HIGH VOLUME: {total_records:,} records (Risk +3)")
elif total_records > 1000:
    risk_score += 2
    risk_factors.append("Medium volume of records (>1,000)")
    print(f"   ⚠️  MEDIUM VOLUME: {total_records:,} records (Risk +2)")
else:
    risk_score += 1
    print(f"   ✓ LOW VOLUME: {total_records:,} records (Risk +1)")

# Factor 2: Data retention period
data_age_max = (datetime.now() - df['Date'].min()).days
if data_age_max > 365:
    risk_score += 3
    risk_factors.append(f"Extended retention period ({data_age_max} days)")
    print(f"   ⚠️  EXTENDED RETENTION: {data_age_max} days (Risk +3)")
elif data_age_max > 180:
    risk_score += 2
    print(f"   ⚠️  MODERATE RETENTION: {data_age_max} days (Risk +2)")
else:
    risk_score += 1
    print(f"   ✓ COMPLIANT RETENTION: {data_age_max} days (Risk +1)")

# Factor 3: Device identification
if 'Device' in df.columns:
    unique_devices = df['Device'].nunique()
    if unique_devices > 1000:
        risk_score += 2
        risk_factors.append(f"Large user base ({unique_devices} devices)")
        print(f"   ⚠️  LARGE USER BASE: {unique_devices:,} devices (Risk +2)")
    else:
        risk_score += 1
        print(f"   ✓ MODERATE USER BASE: {unique_devices:,} devices (Risk +1)")

# Factor 4: OS version diversity
unique_os = df['Android OS Version'].nunique()
if unique_os > 10:
    risk_score += 1
    print(f"   ⚠️  HIGH OS FRAGMENTATION: {unique_os} versions (Risk +1)")
else:
    print(f"   ✓ LOW OS FRAGMENTATION: {unique_os} versions")

# Factor 5: Presence of crash data
total_crashes = df['Daily Crashes'].sum()
total_anrs = df['Daily ANRs'].sum()
if total_crashes > 0 or total_anrs > 0:
    risk_score += 1
    risk_factors.append(f"Contains performance incidents ({total_crashes} crashes, {total_anrs} ANRs)")
    print(f"   ⚠️  INCIDENT DATA: {total_crashes:,} crashes, {total_anrs:,} ANRs (Risk +1)")

# Calculate overall risk level
max_possible_risk = 11  # Maximum points from all factors
risk_percentage = (risk_score / max_possible_risk) * 100

print(f"\n📊 OVERALL DPIA RISK ASSESSMENT:")
print(f"   Total Risk Score: {risk_score}/{max_possible_risk} ({risk_percentage:.1f}%)")

if risk_percentage >= 70:
    risk_level = "HIGH RISK"
    risk_color = "🔴"
elif risk_percentage >= 40:
    risk_level = "MEDIUM RISK"
    risk_color = "🟡"
else:
    risk_level = "LOW RISK"
    risk_color = "🟢"

print(f"   Risk Level: {risk_color} {risk_level}")

if risk_level == "HIGH RISK":
    print(f"\n   🔴 ACTION REQUIRED:")
    print(f"      • Complete formal DPIA documentation")
    print(f"      • Consult with Data Protection Officer (DPO)")
    print(f"      • Implement additional safeguards")
    print(f"      • Consider privacy-by-design improvements")
elif risk_level == "MEDIUM RISK":
    print(f"\n   🟡 RECOMMENDATION:")
    print(f"      • Document risk assessment findings")
    print(f"      • Review data retention policies")
    print(f"      • Enhance security measures")

print(f"\n📋 KEY RISK FACTORS IDENTIFIED:")
for i, factor in enumerate(risk_factors, 1):
    print(f"   {i}. {factor}")

# ===========================================================================
# 2. DPIA COMPLIANCE CHECKLIST
# ===========================================================================

print("\n\n" + "="*80)
print("2. DPIA COMPLIANCE CHECKLIST")
print("="*80)

dpia_checklist = [
    ("Data Processing Necessity", "Is crash/ANR monitoring necessary for app functionality?"),
    ("Legitimate Interest", "Does monitoring serve legitimate business interest?"),
    ("User Consent", "Have users been informed and given consent?"),
    ("Purpose Limitation", "Is data used only for stated purpose?"),
    ("Data Minimization", "Are we collecting only necessary data?"),
    ("Storage Limitation", "Is retention period justified and documented?"),
    ("Security Measures", "Are appropriate technical measures in place?"),
    ("Rights Management", "Can users exercise their data rights (access, deletion)?"),
    ("Processor Agreements", "Are third-party processors under contract?"),
    ("Breach Response", "Is incident response plan documented?")
]

print(f"\n✓ Complete the following DPIA requirements:\n")
for i, (requirement, question) in enumerate(dpia_checklist, 1):
    print(f"   [{' '}] {i}. {requirement}")
    print(f"       Q: {question}")
    print()

# ===========================================================================
# 3. BREACH EVIDENCE ANALYSIS
# ===========================================================================

print("\n" + "="*80)
print("3. BREACH EVIDENCE ANALYSIS")
print("="*80)

print(f"\n🔍 ANALYZING DATA ACCESS PATTERNS FOR SUSPICIOUS ACTIVITY...")

# Analyze daily record patterns
daily_records = df.groupby('Date').size().reset_index(name='Record_Count')
daily_records = daily_records.sort_values('Date')

# Statistical anomaly detection
mean_count = daily_records['Record_Count'].mean()
std_count = daily_records['Record_Count'].std()
daily_records['Z_Score'] = (daily_records['Record_Count'] - mean_count) / std_count
daily_records['Is_Anomaly'] = abs(daily_records['Z_Score']) > 2

anomalies = daily_records[daily_records['Is_Anomaly']]
anomaly_count = len(anomalies)

print(f"\n📊 ACCESS PATTERN ANALYSIS:")
print(f"   Mean daily records: {mean_count:.2f}")
print(f"   Standard deviation: {std_count:.2f}")
print(f"   Detection threshold: ±2 standard deviations")
print(f"   Total days analyzed: {len(daily_records)}")
print(f"   Anomalies detected: {anomaly_count}")

if anomaly_count > 0:
    print(f"\n⚠️  SUSPICIOUS ACTIVITY DETECTED!")
    print(f"\n   Days with unusual access patterns:")
    for idx, row in anomalies.iterrows():
        date_str = row['Date'].strftime('%Y-%m-%d')
        deviation = "HIGH" if row['Z_Score'] > 2 else "LOW"
        print(f"   • {date_str}: {row['Record_Count']} records (Z-score: {row['Z_Score']:.2f}, {deviation})")
    
    print(f"\n   🔴 INVESTIGATION ACTIONS REQUIRED:")
    print(f"      1. Review server access logs for these dates")
    print(f"      2. Check for unauthorized data exports")
    print(f"      3. Verify user authentication logs")
    print(f"      4. Confirm business justification for spikes")
    print(f"      5. Document findings in security incident log")
else:
    print(f"\n   ✅ NO ANOMALOUS ACCESS PATTERNS DETECTED")
    print(f"      Data access patterns appear normal and consistent")

# Analyze crash/ANR spikes
print(f"\n\n📊 INCIDENT SPIKE ANALYSIS:")

daily_crashes = df.groupby('Date')['Daily Crashes'].sum()
daily_anrs = df.groupby('Date')['Daily ANRs'].sum()

crash_mean = daily_crashes.mean()
crash_std = daily_crashes.std()
anr_mean = daily_anrs.mean()
anr_std = daily_anrs.std()

crash_spikes = daily_crashes[daily_crashes > (crash_mean + 2 * crash_std)]
anr_spikes = daily_anrs[daily_anrs > (anr_mean + 2 * anr_std)]

print(f"   Crash spikes detected: {len(crash_spikes)}")
print(f"   ANR spikes detected: {len(anr_spikes)}")

if len(crash_spikes) > 0:
    print(f"\n   ⚠️  CRASH SPIKES ON:")
    for date, value in crash_spikes.items():
        print(f"      • {date.strftime('%Y-%m-%d')}: {value:.0f} crashes ({((value/crash_mean)-1)*100:.1f}% above normal)")

if len(anr_spikes) > 0:
    print(f"\n   ⚠️  ANR SPIKES ON:")
    for date, value in anr_spikes.items():
        print(f"      • {date.strftime('%Y-%m-%d')}: {value:.0f} ANRs ({((value/anr_mean)-1)*100:.1f}% above normal)")

# ===========================================================================
# 4. BREACH NOTIFICATION REQUIREMENTS
# ===========================================================================

print("\n\n" + "="*80)
print("4. BREACH NOTIFICATION REQUIREMENTS (NDPR Article 33)")
print("="*80)

print(f"\n⚠️  IF A DATA BREACH IS CONFIRMED:")
print(f"\n   Timeline Requirements:")
print(f"   • Notify NITDA within 72 hours of awareness")
print(f"   • Document breach details and response actions")
print(f"   • Notify affected users if high risk to their rights")
print(f"\n   Breach Documentation Must Include:")
print(f"   1. Nature of the breach (unauthorized access, exfiltration, etc.)")
print(f"   2. Categories and approximate number of data subjects affected")
print(f"   3. Categories and approximate number of records affected")
print(f"   4. Likely consequences of the breach")
print(f"   5. Measures taken to address the breach")
print(f"   6. Measures to mitigate possible adverse effects")
print(f"\n   Contact Information:")
print(f"   • NITDA: info@nitda.gov.ng")
print(f"   • Website: https://nitda.gov.ng")

# ===========================================================================
# 5. INCIDENT RESPONSE CHECKLIST
# ===========================================================================

print("\n\n" + "="*80)
print("5. SECURITY INCIDENT RESPONSE CHECKLIST")
print("="*80)

incident_checklist = [
    "Isolate affected systems immediately",
    "Preserve evidence (logs, access records, timestamps)",
    "Notify Data Protection Officer (DPO)",
    "Assemble incident response team",
    "Conduct forensic analysis of breach scope",
    "Identify compromised data and affected users",
    "Assess risk to individuals' rights and freedoms",
    "Document all actions taken with timestamps",
    "Prepare breach notification for NITDA",
    "Notify affected users if required",
    "Implement remediation measures",
    "Conduct post-incident review",
    "Update security policies and procedures"
]

print(f"\n📋 IMMEDIATE ACTIONS IF BREACH SUSPECTED:\n")
for i, action in enumerate(incident_checklist, 1):
    print(f"   [{' '}] {i}. {action}")

# ===========================================================================
# 6. AUDIT TRAIL REQUIREMENTS
# ===========================================================================

print("\n\n" + "="*80)
print("6. AUDIT TRAIL & LOGGING REQUIREMENTS")
print("="*80)

print(f"\n📝 REQUIRED AUDIT LOGS (must be maintained):")
print(f"\n   Access Logs:")
print(f"   • User ID accessing the data")
print(f"   • Timestamp of access")
print(f"   • IP address of accessing device")
print(f"   • Type of operation (read, write, delete, export)")
print(f"   • Records accessed or modified")
print(f"\n   Security Event Logs:")
print(f"   • Failed authentication attempts")
print(f"   • Privilege escalation events")
print(f"   • Configuration changes")
print(f"   • Data export activities")
print(f"   • Unusual query patterns")
print(f"\n   Retention:")
print(f"   • Audit logs must be retained for minimum 1 year")
print(f"   • Logs must be tamper-proof and encrypted")
print(f"   • Regular review by security team required")

# Generate summary report
print("\n\n" + "="*80)
print("BATCH 3 SUMMARY REPORT")
print("="*80)

print(f"\n📊 DPIA RISK ASSESSMENT:")
print(f"   Overall Risk Level: {risk_level} ({risk_percentage:.1f}%)")
print(f"   Risk Factors: {len(risk_factors)}")

print(f"\n🔍 BREACH EVIDENCE:")
print(f"   Anomalous Days: {anomaly_count}")
print(f"   Crash Spikes: {len(crash_spikes)}")
print(f"   ANR Spikes: {len(anr_spikes)}")

if anomaly_count > 0 or len(crash_spikes) > 0 or len(anr_spikes) > 0:
    print(f"\n   ⚠️  STATUS: INVESTIGATION REQUIRED")
else:
    print(f"\n   ✅ STATUS: NO IMMEDIATE CONCERNS")

print(f"\n📋 COMPLIANCE STATUS:")
print(f"   DPIA Required: {'YES' if risk_level in ['HIGH RISK', 'MEDIUM RISK'] else 'RECOMMENDED'}")
print(f"   NITDA Registration: VERIFY MANUALLY")
print(f"   Data Protection Officer: VERIFY APPOINTED")

print("\n" + "="*80)
print("BATCH 3 COMPLETE - DPIA & BREACH EVIDENCE ANALYSIS")
print("="*80)
================================================================================
NDPR/PRIVACY COMPLIANCE ANALYSIS - BATCH 3
DPIA (DATA PROTECTION IMPACT ASSESSMENT) & BREACH EVIDENCE
================================================================================

Dataset loaded: 3,391 records
Date range: 2024-07-05 00:00:00 to 2025-05-11 00:00:00

================================================================================
1. DATA PROTECTION IMPACT ASSESSMENT (DPIA)
================================================================================

📋 DPIA FRAMEWORK - NDPR Article 25 Compliance

   A DPIA is REQUIRED when processing:
   • Involves systematic monitoring
   • Processes personal data at large scale
   • Uses automated decision-making
   • Processes sensitive or criminal data

🔍 RISK ASSESSMENT:
   ⚠️  MEDIUM VOLUME: 3,391 records (Risk +2)
   ⚠️  EXTENDED RETENTION: 470 days (Risk +3)
   ✓ MODERATE USER BASE: 782 devices (Risk +1)
   ⚠️  HIGH OS FRAGMENTATION: 13 versions (Risk +1)
   ⚠️  INCIDENT DATA: 6,929.0 crashes, 9,142.0 ANRs (Risk +1)

📊 OVERALL DPIA RISK ASSESSMENT:
   Total Risk Score: 8/11 (72.7%)
   Risk Level: 🔴 HIGH RISK

   🔴 ACTION REQUIRED:
      • Complete formal DPIA documentation
      • Consult with Data Protection Officer (DPO)
      • Implement additional safeguards
      • Consider privacy-by-design improvements

📋 KEY RISK FACTORS IDENTIFIED:
   1. Medium volume of records (>1,000)
   2. Extended retention period (470 days)
   3. Contains performance incidents (6929.0 crashes, 9142.0 ANRs)


================================================================================
2. DPIA COMPLIANCE CHECKLIST
================================================================================

✓ Complete the following DPIA requirements:

   [ ] 1. Data Processing Necessity
       Q: Is crash/ANR monitoring necessary for app functionality?

   [ ] 2. Legitimate Interest
       Q: Does monitoring serve legitimate business interest?

   [ ] 3. User Consent
       Q: Have users been informed and given consent?

   [ ] 4. Purpose Limitation
       Q: Is data used only for stated purpose?

   [ ] 5. Data Minimization
       Q: Are we collecting only necessary data?

   [ ] 6. Storage Limitation
       Q: Is retention period justified and documented?

   [ ] 7. Security Measures
       Q: Are appropriate technical measures in place?

   [ ] 8. Rights Management
       Q: Can users exercise their data rights (access, deletion)?

   [ ] 9. Processor Agreements
       Q: Are third-party processors under contract?

   [ ] 10. Breach Response
       Q: Is incident response plan documented?


================================================================================
3. BREACH EVIDENCE ANALYSIS
================================================================================

🔍 ANALYZING DATA ACCESS PATTERNS FOR SUSPICIOUS ACTIVITY...

📊 ACCESS PATTERN ANALYSIS:
   Mean daily records: 56.52
   Standard deviation: 76.69
   Detection threshold: ±2 standard deviations
   Total days analyzed: 60
   Anomalies detected: 3

⚠️  SUSPICIOUS ACTIVITY DETECTED!

   Days with unusual access patterns:
   • 2024-07-24: 211 records (Z-score: 2.01, HIGH)
   • 2024-07-25: 237 records (Z-score: 2.35, HIGH)
   • 2024-07-26: 214 records (Z-score: 2.05, HIGH)

   🔴 INVESTIGATION ACTIONS REQUIRED:
      1. Review server access logs for these dates
      2. Check for unauthorized data exports
      3. Verify user authentication logs
      4. Confirm business justification for spikes
      5. Document findings in security incident log


📊 INCIDENT SPIKE ANALYSIS:
   Crash spikes detected: 1
   ANR spikes detected: 1

   ⚠️  CRASH SPIKES ON:
      • 2024-07-25: 386 crashes (234.2% above normal)

   ⚠️  ANR SPIKES ON:
      • 2025-02-22: 368 ANRs (141.5% above normal)


================================================================================
4. BREACH NOTIFICATION REQUIREMENTS (NDPR Article 33)
================================================================================

⚠️  IF A DATA BREACH IS CONFIRMED:

   Timeline Requirements:
   • Notify NITDA within 72 hours of awareness
   • Document breach details and response actions
   • Notify affected users if high risk to their rights

   Breach Documentation Must Include:
   1. Nature of the breach (unauthorized access, exfiltration, etc.)
   2. Categories and approximate number of data subjects affected
   3. Categories and approximate number of records affected
   4. Likely consequences of the breach
   5. Measures taken to address the breach
   6. Measures to mitigate possible adverse effects

   Contact Information:
   • NITDA: info@nitda.gov.ng
   • Website: https://nitda.gov.ng


================================================================================
5. SECURITY INCIDENT RESPONSE CHECKLIST
================================================================================

📋 IMMEDIATE ACTIONS IF BREACH SUSPECTED:

   [ ] 1. Isolate affected systems immediately
   [ ] 2. Preserve evidence (logs, access records, timestamps)
   [ ] 3. Notify Data Protection Officer (DPO)
   [ ] 4. Assemble incident response team
   [ ] 5. Conduct forensic analysis of breach scope
   [ ] 6. Identify compromised data and affected users
   [ ] 7. Assess risk to individuals' rights and freedoms
   [ ] 8. Document all actions taken with timestamps
   [ ] 9. Prepare breach notification for NITDA
   [ ] 10. Notify affected users if required
   [ ] 11. Implement remediation measures
   [ ] 12. Conduct post-incident review
   [ ] 13. Update security policies and procedures


================================================================================
6. AUDIT TRAIL & LOGGING REQUIREMENTS
================================================================================

📝 REQUIRED AUDIT LOGS (must be maintained):

   Access Logs:
   • User ID accessing the data
   • Timestamp of access
   • IP address of accessing device
   • Type of operation (read, write, delete, export)
   • Records accessed or modified

   Security Event Logs:
   • Failed authentication attempts
   • Privilege escalation events
   • Configuration changes
   • Data export activities
   • Unusual query patterns

   Retention:
   • Audit logs must be retained for minimum 1 year
   • Logs must be tamper-proof and encrypted
   • Regular review by security team required


================================================================================
BATCH 3 SUMMARY REPORT
================================================================================

📊 DPIA RISK ASSESSMENT:
   Overall Risk Level: HIGH RISK (72.7%)
   Risk Factors: 3

🔍 BREACH EVIDENCE:
   Anomalous Days: 3
   Crash Spikes: 1
   ANR Spikes: 1

   ⚠️  STATUS: INVESTIGATION REQUIRED

📋 COMPLIANCE STATUS:
   DPIA Required: YES
   NITDA Registration: VERIFY MANUALLY
   Data Protection Officer: VERIFY APPOINTED

================================================================================
BATCH 3 COMPLETE - DPIA & BREACH EVIDENCE ANALYSIS
================================================================================

PLOT CHARTS: NDPR Compliance:DPIA and Breach Evidence Analysis

In [4]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300

# Load and prepare data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
df.columns = df.columns.str.strip()

# Find columns
date_col = [col for col in df.columns if 'date' in col.lower()][0]
device_col = [col for col in df.columns if 'device' in col.lower()][0] if any('device' in col.lower() for col in df.columns) else None

# Keep required columns
columns_to_keep = ['Daily ANRs', 'Android OS Version', 'Daily Crashes', date_col]
if device_col:
    columns_to_keep.append(device_col)
df = df[columns_to_keep].copy()

# Clean data
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.rename(columns={date_col: 'Date'})
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])
if device_col:
    df = df.rename(columns={device_col: 'Device'})

# ===========================================================================
# CHART 1: DPIA RISK ASSESSMENT VISUALIZATION
# ===========================================================================

# Calculate risk factors
risk_factors = []
risk_scores = []
risk_levels = []

# Volume Risk
total_records = len(df)
if total_records > 10000:
    risk_factors.append('Data Volume')
    risk_scores.append(3)
    risk_levels.append('HIGH')
elif total_records > 1000:
    risk_factors.append('Data Volume')
    risk_scores.append(2)
    risk_levels.append('MEDIUM')
else:
    risk_factors.append('Data Volume')
    risk_scores.append(1)
    risk_levels.append('LOW')

# Retention Risk
data_age = (datetime.now() - df['Date'].min()).days
if data_age > 365:
    risk_factors.append('Retention Period')
    risk_scores.append(3)
    risk_levels.append('HIGH')
elif data_age > 180:
    risk_factors.append('Retention Period')
    risk_scores.append(2)
    risk_levels.append('MEDIUM')
else:
    risk_factors.append('Retention Period')
    risk_scores.append(1)
    risk_levels.append('LOW')

# Device Identification Risk
if 'Device' in df.columns:
    unique_devices = df['Device'].nunique()
    if unique_devices > 1000:
        risk_factors.append('User Base Size')
        risk_scores.append(2)
        risk_levels.append('MEDIUM')
    else:
        risk_factors.append('User Base Size')
        risk_scores.append(1)
        risk_levels.append('LOW')

# OS Version Diversity Risk
unique_os = df['Android OS Version'].nunique()
if unique_os > 10:
    risk_factors.append('OS Fragmentation')
    risk_scores.append(2)
    risk_levels.append('MEDIUM')
else:
    risk_factors.append('OS Fragmentation')
    risk_scores.append(1)
    risk_levels.append('LOW')

# Data Sensitivity Risk (based on crash/ANR presence)
has_crashes = (df['Daily Crashes'] > 0).any()
has_anrs = (df['Daily ANRs'] > 0).any()
if has_crashes and has_anrs:
    risk_factors.append('Data Sensitivity')
    risk_scores.append(2)
    risk_levels.append('MEDIUM')
else:
    risk_factors.append('Data Sensitivity')
    risk_scores.append(1)
    risk_levels.append('LOW')

# Create DPIA Risk Assessment Chart
fig, axes = plt.subplots(2, 2, figsize=(16, 10))

# Chart 1: Risk Score by Factor
colors = ['#e74c3c' if s == 3 else '#f39c12' if s == 2 else '#27ae60' for s in risk_scores]
bars = axes[0, 0].barh(risk_factors, risk_scores, color=colors, alpha=0.8, edgecolor='black')
axes[0, 0].set_xlabel('Risk Score', fontsize=12, fontweight='bold')
axes[0, 0].set_title('DPIA: Risk Assessment by Factor', fontsize=13, fontweight='bold')
axes[0, 0].set_xlim(0, 3.5)
axes[0, 0].grid(True, alpha=0.3, axis='x')

# Add value labels and risk levels
for i, (score, level) in enumerate(zip(risk_scores, risk_levels)):
    axes[0, 0].text(score + 0.1, i, f'{score} - {level}', va='center', 
                    fontsize=10, fontweight='bold')

# Add legend for risk levels
axes[0, 0].axvline(x=1, color='#27ae60', linestyle='--', alpha=0.5, linewidth=2)
axes[0, 0].axvline(x=2, color='#f39c12', linestyle='--', alpha=0.5, linewidth=2)
axes[0, 0].axvline(x=3, color='#e74c3c', linestyle='--', alpha=0.5, linewidth=2)

# Chart 2: Overall Risk Gauge
total_risk = sum(risk_scores)
max_possible_risk = len(risk_factors) * 3
risk_percentage = (total_risk / max_possible_risk) * 100

if risk_percentage >= 70:
    gauge_color = '#e74c3c'
    risk_category = 'HIGH RISK'
elif risk_percentage >= 40:
    gauge_color = '#f39c12'
    risk_category = 'MEDIUM RISK'
else:
    gauge_color = '#27ae60'
    risk_category = 'LOW RISK'

axes[0, 1].barh(['Overall DPIA Risk'], [risk_percentage], color=gauge_color, 
                alpha=0.8, edgecolor='black', height=0.5)
axes[0, 1].set_xlabel('Risk Percentage (%)', fontsize=12, fontweight='bold')
axes[0, 1].set_title(f'Overall DPIA Risk Level: {risk_category}', 
                     fontsize=13, fontweight='bold')
axes[0, 1].set_xlim(0, 100)
axes[0, 1].text(risk_percentage + 2, 0, f'{risk_percentage:.1f}%', 
                va='center', fontsize=14, fontweight='bold')
axes[0, 1].axvline(x=40, color='#f39c12', linestyle='--', alpha=0.5, linewidth=2)
axes[0, 1].axvline(x=70, color='#e74c3c', linestyle='--', alpha=0.5, linewidth=2)
axes[0, 1].grid(True, alpha=0.3, axis='x')

# ===========================================================================
# CHART 3: BREACH EVIDENCE - UNUSUAL ACTIVITY DETECTION
# ===========================================================================

# Analyze daily record patterns for anomalies
daily_records = df.groupby('Date').size().reset_index(name='Record_Count')
daily_records = daily_records.sort_values('Date')

# Calculate z-scores for anomaly detection
mean_count = daily_records['Record_Count'].mean()
std_count = daily_records['Record_Count'].std()
daily_records['Z_Score'] = (daily_records['Record_Count'] - mean_count) / std_count
daily_records['Is_Anomaly'] = abs(daily_records['Z_Score']) > 2

# Chart 3: Daily Activity Pattern with Anomalies
normal_data = daily_records[~daily_records['Is_Anomaly']]
anomaly_data = daily_records[daily_records['Is_Anomaly']]

axes[1, 0].plot(normal_data['Date'], normal_data['Record_Count'], 
                marker='o', linewidth=2, markersize=4, color='#3498db', 
                label='Normal Activity')

if len(anomaly_data) > 0:
    axes[1, 0].scatter(anomaly_data['Date'], anomaly_data['Record_Count'], 
                       color='#e74c3c', s=200, marker='X', 
                       label='Suspicious Activity', zorder=5, edgecolor='black', linewidth=2)

# Add threshold lines
upper_threshold = mean_count + 2 * std_count
lower_threshold = max(0, mean_count - 2 * std_count)
axes[1, 0].axhline(y=upper_threshold, color='red', linestyle='--', 
                   linewidth=2, alpha=0.6, label=f'Upper Threshold: {upper_threshold:.0f}')
axes[1, 0].axhline(y=mean_count, color='green', linestyle='-', 
                   linewidth=2, alpha=0.6, label=f'Mean: {mean_count:.0f}')

axes[1, 0].set_xlabel('Date', fontsize=12, fontweight='bold')
axes[1, 0].set_ylabel('Daily Record Count', fontsize=12, fontweight='bold')
axes[1, 0].set_title('Breach Detection: Unusual Data Access Patterns', 
                     fontsize=13, fontweight='bold')
axes[1, 0].legend(loc='best', fontsize=9)
axes[1, 0].grid(True, alpha=0.3)
axes[1, 0].tick_params(axis='x', rotation=45)

# Chart 4: Anomaly Summary
anomaly_count = daily_records['Is_Anomaly'].sum()
normal_count = (~daily_records['Is_Anomaly']).sum()

categories = ['Normal\nActivity', 'Suspicious\nActivity']
counts = [normal_count, anomaly_count]
colors_pie = ['#27ae60', '#e74c3c']

wedges, texts, autotexts = axes[1, 1].pie(counts, labels=categories, 
                                           colors=colors_pie, autopct='%1.1f%%',
                                           startangle=90, explode=(0, 0.1),
                                           textprops={'fontsize': 11, 'fontweight': 'bold'})

axes[1, 1].set_title(f'Security Incident Summary\n({anomaly_count} Anomalies Detected)', 
                     fontsize=13, fontweight='bold')

# Add summary text
summary_text = f'Total Days Analyzed: {len(daily_records)}\n'
summary_text += f'Normal Days: {normal_count}\n'
summary_text += f'Anomalous Days: {anomaly_count}\n'
if anomaly_count > 0:
    summary_text += f'\n⚠️ INVESTIGATION REQUIRED'
else:
    summary_text += f'\n✅ NO THREATS DETECTED'

axes[1, 1].text(0, -1.4, summary_text, fontsize=10, ha='center',
                bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

plt.tight_layout()
plt.savefig('ndpr_batch3_dpia_breach_analysis.png', dpi=300, bbox_inches='tight')
plt.show()
print("✓ Chart saved: ndpr_batch3_dpia_breach_analysis.png")

# ===========================================================================
# CHART 5: COMPLIANCE SCORECARD
# ===========================================================================

fig, ax = plt.subplots(figsize=(12, 8))

# Define compliance metrics
compliance_metrics = [
    'PII Detection',
    'Data Minimization',
    'Retention Policy',
    'Access Control',
    'Encryption at Rest',
    'Encryption in Transit',
    'User Consent',
    'DPIA Completed',
    'Breach Response Plan',
    'Audit Logging'
]

# Simulate compliance scores (in real scenario, these would be actual checks)
# For this analysis, we'll mark what can be verified from data
compliance_status = []
compliance_colors = []

# PII Detection - can verify
pii_found = False  # Based on earlier analysis
compliance_status.append('PASS' if not pii_found else 'REVIEW NEEDED')
compliance_colors.append('#27ae60' if not pii_found else '#f39c12')

# Data Minimization - can verify
minimal_columns = len(df.columns) <= 5
compliance_status.append('PASS' if minimal_columns else 'REVIEW NEEDED')
compliance_colors.append('#27ae60' if minimal_columns else '#f39c12')

# Retention Policy - can verify
retention_ok = data_age <= 365
compliance_status.append('PASS' if retention_ok else 'FAIL')
compliance_colors.append('#27ae60' if retention_ok else '#e74c3c')

# Others require manual verification
manual_checks = ['MANUAL CHECK'] * 7
compliance_status.extend(manual_checks)
compliance_colors.extend(['#95a5a6'] * 7)

# Create horizontal bar chart
y_pos = np.arange(len(compliance_metrics))
ax.barh(y_pos, [1]*len(compliance_metrics), color=compliance_colors, 
        alpha=0.8, edgecolor='black')

ax.set_yticks(y_pos)
ax.set_yticklabels(compliance_metrics, fontsize=11)
ax.set_xlim(0, 1.5)
ax.set_xticks([])
ax.set_title('NDPR Compliance Scorecard', fontsize=15, fontweight='bold', pad=20)

# Add status labels
for i, (metric, status, color) in enumerate(zip(compliance_metrics, compliance_status, compliance_colors)):
    ax.text(1.05, i, status, va='center', fontsize=10, fontweight='bold',
            bbox=dict(boxstyle='round', facecolor='white', edgecolor=color, linewidth=2))

# Add legend
from matplotlib.patches import Patch
legend_elements = [
    Patch(facecolor='#27ae60', label='PASS', alpha=0.8),
    Patch(facecolor='#f39c12', label='REVIEW NEEDED', alpha=0.8),
    Patch(facecolor='#e74c3c', label='FAIL', alpha=0.8),
    Patch(facecolor='#95a5a6', label='MANUAL CHECK REQUIRED', alpha=0.8)
]
ax.legend(handles=legend_elements, loc='upper right', fontsize=10)

# Add summary box
pass_count = compliance_status.count('PASS')
fail_count = compliance_status.count('FAIL')
review_count = compliance_status.count('REVIEW NEEDED')
manual_count = compliance_status.count('MANUAL CHECK')

summary = f'Compliance Summary:\n'
summary += f'✓ Pass: {pass_count}/{len(compliance_metrics)}\n'
summary += f'⚠ Review: {review_count}/{len(compliance_metrics)}\n'
summary += f'✗ Fail: {fail_count}/{len(compliance_metrics)}\n'
summary += f'⊙ Manual: {manual_count}/{len(compliance_metrics)}'

ax.text(0.02, len(compliance_metrics) - 1, summary, fontsize=10,
        bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.8),
        verticalalignment='top')

plt.tight_layout()
plt.savefig('ndpr_batch3_compliance_scorecard.png', dpi=300, bbox_inches='tight')
plt.show()
print("✓ Chart saved: ndpr_batch3_compliance_scorecard.png")

print("\n" + "="*80)
print("NDPR BATCH 3 CHARTS COMPLETE")
print("="*80)
print(f"\nGenerated Charts:")
print(f"  1. ndpr_batch3_dpia_breach_analysis.png")
print(f"     - DPIA risk assessment by factor")
print(f"     - Overall risk gauge")
print(f"     - Breach detection with anomalies")
print(f"     - Security incident summary")
print(f"\n  2. ndpr_batch3_compliance_scorecard.png")
print(f"     - Complete NDPR compliance checklist")
print(f"     - Automated and manual verification status")
No description has been provided for this image
✓ Chart saved: ndpr_batch3_dpia_breach_analysis.png
No description has been provided for this image
✓ Chart saved: ndpr_batch3_compliance_scorecard.png

================================================================================
NDPR BATCH 3 CHARTS COMPLETE
================================================================================

Generated Charts:
  1. ndpr_batch3_dpia_breach_analysis.png
     - DPIA risk assessment by factor
     - Overall risk gauge
     - Breach detection with anomalies
     - Security incident summary

  2. ndpr_batch3_compliance_scorecard.png
     - Complete NDPR compliance checklist
     - Automated and manual verification status
In [ ]: